[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230927154339.1600738-14-rrichter@amd.com>
Date: Wed, 27 Sep 2023 17:43:32 +0200
From: Robert Richter <rrichter@....com>
To: Alison Schofield <alison.schofield@...el.com>,
Vishal Verma <vishal.l.verma@...el.com>,
Ira Weiny <ira.weiny@...el.com>,
Ben Widawsky <bwidawsk@...nel.org>,
Dan Williams <dan.j.williams@...el.com>,
"Davidlohr Bueso" <dave@...olabs.net>,
Jonathan Cameron <jonathan.cameron@...wei.com>,
Dave Jiang <dave.jiang@...el.com>
CC: <linux-cxl@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
Bjorn Helgaas <bhelgaas@...gle.com>,
Terry Bowman <terry.bowman@....com>,
Robert Richter <rrichter@....com>,
Jonathan Cameron <Jonathan.Cameron@...wei.com>
Subject: [PATCH v11 13/20] cxl/pci: Update CXL error logging to use RAS register address
From: Terry Bowman <terry.bowman@....com>
The CXL error handler currently only logs endpoint RAS status. The CXL
topology includes several components providing RAS details to be logged
during error handling.[1] Update the current handler's RAS logging to use a
RAS register address. Also, update the error handler function names to be
consistent with correctable and uncorrectable RAS. This will allow for
adding support to log other CXL component's RAS details in the future.
[1] CXL3.0 Table 8-22 CXL_Capability_ID Assignment
Co-developed-by: Robert Richter <rrichter@....com>
Signed-off-by: Terry Bowman <terry.bowman@....com>
Signed-off-by: Robert Richter <rrichter@....com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@...wei.com>
Reviewed-by: Dave Jiang <dave.jiang@...el.com>
---
drivers/cxl/core/pci.c | 44 +++++++++++++++++++++++++++++-------------
1 file changed, 31 insertions(+), 13 deletions(-)
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 4c6c5c7ba5a3..2b8883288539 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -646,32 +646,36 @@ void read_cdat_data(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
-void cxl_cor_error_detected(struct pci_dev *pdev)
+static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
+ void __iomem *ras_base)
{
- struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
void __iomem *addr;
u32 status;
- if (!cxlds->regs.ras)
+ if (!ras_base)
return;
- addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
+ addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
status = readl(addr);
if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
}
}
-EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
+
+static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
+{
+ return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras);
+}
/* CXL spec rev3.0 8.2.4.16.1 */
-static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
+static void header_log_copy(void __iomem *ras_base, u32 *log)
{
void __iomem *addr;
u32 *log_addr;
int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
- addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
+ addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET;
log_addr = log;
for (i = 0; i < log_u32_size; i++) {
@@ -685,17 +689,18 @@ static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
* Log the state of the RAS status registers and prepare them to log the
* next error status. Return 1 if reset needed.
*/
-static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
+static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
+ void __iomem *ras_base)
{
u32 hl[CXL_HEADERLOG_SIZE_U32];
void __iomem *addr;
u32 status;
u32 fe;
- if (!cxlds->regs.ras)
+ if (!ras_base)
return false;
- addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
+ addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
status = readl(addr);
if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
return false;
@@ -703,7 +708,7 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
/* If multiple errors, log header points to first error from ctrl reg */
if (hweight32(status) > 1) {
void __iomem *rcc_addr =
- cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
+ ras_base + CXL_RAS_CAP_CONTROL_OFFSET;
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
readl(rcc_addr)));
@@ -711,13 +716,18 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
fe = status;
}
- header_log_copy(cxlds, hl);
+ header_log_copy(ras_base, hl);
trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl);
writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
return true;
}
+static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
+{
+ return __cxl_handle_ras(cxlds, cxlds->regs.ras);
+}
+
#ifdef CONFIG_PCIEAER_CXL
void devm_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport)
@@ -733,6 +743,14 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_parent_dport, CXL);
#endif
+void cxl_cor_error_detected(struct pci_dev *pdev)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+
+ cxl_handle_endpoint_cor_ras(cxlds);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
+
pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
@@ -747,7 +765,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
* chance the situation is recoverable dump the status of the RAS
* capability registers and bounce the active state of the memdev.
*/
- ue = cxl_report_and_clear(cxlds);
+ ue = cxl_handle_endpoint_ras(cxlds);
switch (state) {
case pci_channel_io_normal:
--
2.30.2
Powered by blists - more mailing lists