[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250327014717.2988633-14-terry.bowman@amd.com>
Date: Wed, 26 Mar 2025 20:47:14 -0500
From: Terry Bowman <terry.bowman@....com>
To: <linux-cxl@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
<linux-pci@...r.kernel.org>, <nifan.cxl@...il.com>, <dave@...olabs.net>,
<jonathan.cameron@...wei.com>, <dave.jiang@...el.com>,
<alison.schofield@...el.com>, <vishal.l.verma@...el.com>,
<dan.j.williams@...el.com>, <bhelgaas@...gle.com>, <mahesh@...ux.ibm.com>,
<ira.weiny@...el.com>, <oohall@...il.com>, <Benjamin.Cheatham@....com>,
<rrichter@....com>, <nathan.fontenot@....com>, <terry.bowman@....com>,
<Smita.KoralahalliChannabasappa@....com>, <lukas@...ner.de>,
<ming.li@...omail.com>, <PradeepVineshReddy.Kodamati@....com>
Subject: [PATCH v8 13/16] cxl/pci: Assign CXL Endpoint protocol error handlers
CXL Endpoint protocol errors are currently handled using PCI error
handlers. The CXL Endpoint requires CXL specific handling in the case of
uncorrectable error handling not provided by the PCI handlers.
Add CXL specific handlers for CXL Endpoints. Assign the CXL handlers
during Endpoint Port initialization.
Keep the PCI Endpoint handlers. PCI handlers can be called if the CXL
device is not trained for alternate protocol (CXL). Update the CXL
Endpoint PCI handlers to call the CXL handler. If the CXL
uncorrectable handler returns PCI_ERS_RESULT_PANIC then the PCI
handler invokes panic().
Signed-off-by: Terry Bowman <terry.bowman@....com>
---
drivers/cxl/core/pci.c | 65 ++++++++++++++++++++++++------------------
drivers/cxl/cxl.h | 5 ++++
drivers/cxl/cxlpci.h | 4 +--
drivers/cxl/pci.c | 8 +++---
drivers/cxl/port.c | 7 +++++
5 files changed, 56 insertions(+), 33 deletions(-)
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 9ed6f700e132..f2139b382839 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -852,10 +852,10 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
#endif
-void cxl_cor_error_detected(struct pci_dev *pdev)
+void cxl_cor_error_detected(struct device *dev, struct cxl_prot_error_info *err_info)
{
+ struct pci_dev *pdev = err_info->pdev;
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
- struct device *dev = &cxlds->cxlmd->dev;
scoped_guard(device, dev) {
if (!dev->driver) {
@@ -873,20 +873,30 @@ void cxl_cor_error_detected(struct pci_dev *pdev)
}
EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL");
-pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
- pci_channel_state_t state)
+void pci_cor_error_detected(struct pci_dev *pdev)
+{
+ struct cxl_prot_error_info err_info;
+
+ if (cxl_create_prot_err_info(pdev, AER_CORRECTABLE, &err_info))
+ return;
+
+ cxl_cor_error_detected(err_info.dev, &err_info);
+}
+EXPORT_SYMBOL_NS_GPL(pci_cor_error_detected, "CXL");
+
+pci_ers_result_t cxl_error_detected(struct device *dev,
+ struct cxl_prot_error_info *err_info)
{
- struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
- struct cxl_memdev *cxlmd = cxlds->cxlmd;
- struct device *dev = &cxlmd->dev;
bool ue;
+ struct pci_dev *pdev = err_info->pdev;
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
scoped_guard(device, dev) {
if (!dev->driver) {
dev_warn(&pdev->dev,
"%s: memdev disabled, abort error handling\n",
dev_name(dev));
- return PCI_ERS_RESULT_DISCONNECT;
+ return PCI_ERS_RESULT_PANIC;
}
if (cxlds->rcd)
@@ -900,29 +910,30 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
ue = cxl_handle_endpoint_ras(cxlds);
}
+ if (ue)
+ return PCI_ERS_RESULT_PANIC;
- switch (state) {
- case pci_channel_io_normal:
- if (ue) {
- device_release_driver(dev);
- return PCI_ERS_RESULT_NEED_RESET;
- }
- return PCI_ERS_RESULT_CAN_RECOVER;
- case pci_channel_io_frozen:
- dev_warn(&pdev->dev,
- "%s: frozen state error detected, disable CXL.mem\n",
- dev_name(dev));
- device_release_driver(dev);
- return PCI_ERS_RESULT_NEED_RESET;
- case pci_channel_io_perm_failure:
- dev_warn(&pdev->dev,
- "failure state error detected, request disconnect\n");
- return PCI_ERS_RESULT_DISCONNECT;
- }
- return PCI_ERS_RESULT_NEED_RESET;
+ return PCI_ERS_RESULT_CAN_RECOVER;
}
EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL");
+pci_ers_result_t pci_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t error)
+{
+ struct cxl_prot_error_info err_info;
+ pci_ers_result_t rc;
+
+ if (cxl_create_prot_err_info(pdev, AER_FATAL, &err_info))
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ rc = cxl_error_detected(err_info.dev, &err_info);
+ if (rc == PCI_ERS_RESULT_PANIC)
+ panic("CXL cachemem error.");
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(pci_error_detected, "CXL");
+
static int cxl_flit_size(struct pci_dev *pdev)
{
if (cxl_pci_flit_256(pdev))
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 512cc38892ed..c1adf8a3cb9e 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -815,6 +815,11 @@ void cxl_port_cor_error_detected(struct device *dev,
pci_ers_result_t cxl_port_error_detected(struct device *dev,
struct cxl_prot_error_info *err_info);
+void cxl_cor_error_detected(struct device *dev,
+ struct cxl_prot_error_info *err_info);
+pci_ers_result_t cxl_error_detected(struct device *dev,
+ struct cxl_prot_error_info *err_info);
+
/**
* struct cxl_endpoint_dvsec_info - Cached DVSEC info
* @mem_enabled: cached value of mem_enabled in the DVSEC at init time
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 92d72c0423ab..d277cf048eba 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -133,8 +133,8 @@ struct cxl_dev_state;
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
struct cxl_endpoint_dvsec_info *info);
void read_cdat_data(struct cxl_port *port);
-void cxl_cor_error_detected(struct pci_dev *pdev);
-pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+void pci_cor_error_detected(struct pci_dev *pdev);
+pci_ers_result_t pci_error_detected(struct pci_dev *pdev,
pci_channel_state_t state);
int cxl_create_prot_err_info(struct pci_dev *_pdev, int severity,
struct cxl_prot_error_info *err_info);
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 4288f4814cc5..c5be4422748e 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -1108,11 +1108,11 @@ static void cxl_reset_done(struct pci_dev *pdev)
}
}
-static const struct pci_error_handlers cxl_error_handlers = {
- .error_detected = cxl_error_detected,
+static const struct pci_error_handlers pci_error_handlers = {
+ .error_detected = pci_error_detected,
.slot_reset = cxl_slot_reset,
.resume = cxl_error_resume,
- .cor_error_detected = cxl_cor_error_detected,
+ .cor_error_detected = pci_cor_error_detected,
.reset_done = cxl_reset_done,
};
@@ -1120,7 +1120,7 @@ static struct pci_driver cxl_pci_driver = {
.name = KBUILD_MODNAME,
.id_table = cxl_mem_pci_tbl,
.probe = cxl_pci_probe,
- .err_handler = &cxl_error_handlers,
+ .err_handler = &pci_error_handlers,
.dev_groups = cxl_rcd_groups,
.driver = {
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 30a4bdb88c31..8e2b70e73582 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -65,6 +65,11 @@ static const struct cxl_error_handlers cxl_port_error_handlers = {
.cor_error_detected = cxl_port_cor_error_detected,
};
+const struct cxl_error_handlers cxl_ep_error_handlers = {
+ .error_detected = cxl_error_detected,
+ .cor_error_detected = cxl_cor_error_detected,
+};
+
static void cxl_assign_error_handlers(struct device *_dev,
const struct cxl_error_handlers *handlers)
{
@@ -203,6 +208,8 @@ static void cxl_endpoint_port_init_ras(struct cxl_port *port)
}
cxl_dport_init_ras_reporting(dport, cxlmd_dev);
+
+ cxl_assign_error_handlers(cxlmd_dev, &cxl_ep_error_handlers);
}
#else
--
2.34.1
Powered by blists - more mailing lists