[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251104170305.4163840-21-terry.bowman@amd.com>
Date: Tue, 4 Nov 2025 11:03:00 -0600
From: Terry Bowman <terry.bowman@....com>
To: <dave@...olabs.net>, <jonathan.cameron@...wei.com>,
<dave.jiang@...el.com>, <alison.schofield@...el.com>,
<dan.j.williams@...el.com>, <bhelgaas@...gle.com>, <shiju.jose@...wei.com>,
<ming.li@...omail.com>, <Smita.KoralahalliChannabasappa@....com>,
<rrichter@....com>, <dan.carpenter@...aro.org>,
<PradeepVineshReddy.Kodamati@....com>, <lukas@...ner.de>,
<Benjamin.Cheatham@....com>, <sathyanarayanan.kuppuswamy@...ux.intel.com>,
<linux-cxl@...r.kernel.org>, <alucerop@....com>, <ira.weiny@...el.com>
CC: <linux-kernel@...r.kernel.org>, <linux-pci@...r.kernel.org>,
<terry.bowman@....com>
Subject: [RESEND v13 20/25] CXL/PCI: Introduce CXL Port protocol error handlers
Add CXL protocol error handlers for CXL Port devices (Root Ports,
Downstream Ports, and Upstream Ports). Implement cxl_port_cor_error_detected()
and cxl_port_error_detected() to handle correctable and uncorrectable errors
respectively.
Introduce cxl_get_ras_base() to retrieve the cached RAS register base
address for a given CXL port. This function supports CXL Root Ports,
Downstream Ports, and Upstream Ports by returning their previously mapped
RAS register addresses.
Add device lock assertions to protect against concurrent device or RAS
register removal during error handling. The port error handlers require
two device locks:
1. The port's CXL parent device - RAS registers are mapped using devm_*
functions with the parent port as the host. Locking the parent prevents
the RAS registers from being unmapped during error handling.
2. The PCI device (pdev->dev) - Locking prevents concurrent modifications
to the PCI device structure during error handling.
The lock assertions added here will be satisfied by device locks introduced
in a subsequent patch.
Introduce get_pci_cxl_host_dev() to return the device responsible for
managing the RAS register mapping. This function increments the reference
count on the host device to prevent premature resource release during error
handling. The caller is responsible for decrementing the reference count.
For CXL endpoints, which manage resources without a separate host device,
this function returns NULL.
Update the AER driver's is_cxl_error() to recognize CXL Port devices in
addition to CXL Endpoints, as both now have CXL-specific error handlers.
Signed-off-by: Terry Bowman <terry.bowman@....com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@...wei.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@...ux.intel.com>
---
Changes in v12->v13:
- Move get_pci_cxl_host_dev() and cxl_handle_proto_error() to Dequeue
patch (Terry)
- Remove EP case in cxl_get_ras_base(), not used. (Terry)
- Remove check for dport->dport_dev (Dave)
- Remove whitespace (Terry)
Changes in v11->v12:
- Add call to cxl_pci_drv_bound() in cxl_handle_proto_error() and
pci_to_cxl_dev()
- Change cxl_error_detected() -> cxl_cor_error_detected()
- Remove NULL variable assignments
- Replace bus_find_device() with find_cxl_port_by_uport() for upstream
port searches.
Changes in v10->v11:
- None
---
drivers/cxl/core/core.h | 10 +++++++
drivers/cxl/core/port.c | 7 ++---
drivers/cxl/core/ras.c | 49 +++++++++++++++++++++++++++++++++++
drivers/pci/pcie/aer_cxl_vh.c | 5 +++-
4 files changed, 67 insertions(+), 4 deletions(-)
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index b2c0ccd6803f..046ec65ed147 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -157,6 +157,8 @@ void cxl_cor_error_detected(struct device *dev);
pci_ers_result_t pci_error_detected(struct pci_dev *pdev,
pci_channel_state_t error);
void pci_cor_error_detected(struct pci_dev *pdev);
+pci_ers_result_t cxl_port_error_detected(struct device *dev);
+void cxl_port_cor_error_detected(struct device *dev);
#else
static inline int cxl_ras_init(void)
{
@@ -176,6 +178,11 @@ static inline pci_ers_result_t pci_error_detected(struct pci_dev *pdev,
return PCI_ERS_RESULT_NONE;
}
static inline void pci_cor_error_detected(struct pci_dev *pdev) { }
+static inline void cxl_port_cor_error_detected(struct device *dev) { }
+static inline pci_ers_result_t cxl_port_error_detected(struct device *dev)
+{
+ return PCI_ERS_RESULT_NONE;
+}
#endif /* CONFIG_CXL_RAS */
/* Restricted CXL Host specific RAS functions */
@@ -190,6 +197,9 @@ static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
#endif /* CONFIG_CXL_RCH_RAS */
int cxl_gpf_port_setup(struct cxl_dport *dport);
+struct cxl_port *find_cxl_port(struct device *dport_dev,
+ struct cxl_dport **dport);
+struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev);
struct cxl_hdm;
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index b70e1b505b5c..d060f864cf2e 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1360,8 +1360,8 @@ static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx)
return NULL;
}
-static struct cxl_port *find_cxl_port(struct device *dport_dev,
- struct cxl_dport **dport)
+struct cxl_port *find_cxl_port(struct device *dport_dev,
+ struct cxl_dport **dport)
{
struct cxl_find_port_ctx ctx = {
.dport_dev = dport_dev,
@@ -1564,7 +1564,7 @@ static int match_port_by_uport(struct device *dev, const void *data)
* Function takes a device reference on the port device. Caller should do a
* put_device() when done.
*/
-static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
+struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
{
struct device *dev;
@@ -1573,6 +1573,7 @@ static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
return to_cxl_port(dev);
return NULL;
}
+EXPORT_SYMBOL_NS_GPL(find_cxl_port_by_uport, "CXL");
static int update_decoder_targets(struct device *dev, void *data)
{
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index beb142054bda..142ca8794107 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -145,6 +145,39 @@ static void cxl_dport_map_ras(struct cxl_dport *dport)
dev_dbg(dev, "Failed to map RAS capability.\n");
}
+static void __iomem *cxl_get_ras_base(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ switch (pci_pcie_type(pdev)) {
+ case PCI_EXP_TYPE_ROOT_PORT:
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ {
+ struct cxl_dport *dport;
+ struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
+
+ if (!dport) {
+ pci_err(pdev, "Failed to find the CXL device");
+ return NULL;
+ }
+ return dport->regs.ras;
+ }
+ case PCI_EXP_TYPE_UPSTREAM:
+ {
+ struct cxl_port *port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
+
+ if (!port) {
+ pci_err(pdev, "Failed to find the CXL device");
+ return NULL;
+ }
+ return port->uport_regs.ras;
+ }
+ }
+
+ dev_warn_once(dev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev));
+ return NULL;
+}
+
/**
* cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
* @dport: the cxl_dport that needs to be initialized
@@ -254,6 +287,22 @@ pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ra
return PCI_ERS_RESULT_PANIC;
}
+void cxl_port_cor_error_detected(struct device *dev)
+{
+ void __iomem *ras_base = cxl_get_ras_base(dev);
+
+ cxl_handle_cor_ras(dev, 0, ras_base);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_port_cor_error_detected, "CXL");
+
+pci_ers_result_t cxl_port_error_detected(struct device *dev)
+{
+ void __iomem *ras_base = cxl_get_ras_base(dev);
+
+ return cxl_handle_ras(dev, 0, ras_base);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_port_error_detected, "CXL");
+
void cxl_cor_error_detected(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
diff --git a/drivers/pci/pcie/aer_cxl_vh.c b/drivers/pci/pcie/aer_cxl_vh.c
index 5dbc81341dc4..25f9512b57f7 100644
--- a/drivers/pci/pcie/aer_cxl_vh.c
+++ b/drivers/pci/pcie/aer_cxl_vh.c
@@ -43,7 +43,10 @@ bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info)
if (!info || !info->is_cxl)
return false;
- if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)
+ if ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT) &&
+ (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) &&
+ (pci_pcie_type(pdev) != PCI_EXP_TYPE_UPSTREAM) &&
+ (pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM))
return false;
return is_internal_error(info);
--
2.34.1
Powered by blists - more mailing lists