[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4a4e496db4c442178bf4f9d14dab9927@huawei.com>
Date: Fri, 27 Jun 2025 12:27:59 +0000
From: Shiju Jose <shiju.jose@...wei.com>
To: Terry Bowman <terry.bowman@....com>, "dave@...olabs.net"
<dave@...olabs.net>, Jonathan Cameron <jonathan.cameron@...wei.com>,
"dave.jiang@...el.com" <dave.jiang@...el.com>, "alison.schofield@...el.com"
<alison.schofield@...el.com>, "dan.j.williams@...el.com"
<dan.j.williams@...el.com>, "bhelgaas@...gle.com" <bhelgaas@...gle.com>,
"ming.li@...omail.com" <ming.li@...omail.com>,
"Smita.KoralahalliChannabasappa@....com"
<Smita.KoralahalliChannabasappa@....com>, "rrichter@....com"
<rrichter@....com>, "dan.carpenter@...aro.org" <dan.carpenter@...aro.org>,
"PradeepVineshReddy.Kodamati@....com" <PradeepVineshReddy.Kodamati@....com>,
"lukas@...ner.de" <lukas@...ner.de>, "Benjamin.Cheatham@....com"
<Benjamin.Cheatham@....com>, "sathyanarayanan.kuppuswamy@...ux.intel.com"
<sathyanarayanan.kuppuswamy@...ux.intel.com>, "linux-cxl@...r.kernel.org"
<linux-cxl@...r.kernel.org>
CC: "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"linux-pci@...r.kernel.org" <linux-pci@...r.kernel.org>
Subject: RE: [PATCH v10 14/17] cxl/pci: Introduce CXL Endpoint protocol error
handlers
>-----Original Message-----
>From: Terry Bowman <terry.bowman@....com>
>Sent: 26 June 2025 23:43
>To: dave@...olabs.net; Jonathan Cameron <jonathan.cameron@...wei.com>;
>dave.jiang@...el.com; alison.schofield@...el.com; dan.j.williams@...el.com;
>bhelgaas@...gle.com; Shiju Jose <shiju.jose@...wei.com>;
>ming.li@...omail.com; Smita.KoralahalliChannabasappa@....com;
>rrichter@....com; dan.carpenter@...aro.org;
>PradeepVineshReddy.Kodamati@....com; lukas@...ner.de;
>Benjamin.Cheatham@....com;
>sathyanarayanan.kuppuswamy@...ux.intel.com; terry.bowman@....com;
>linux-cxl@...r.kernel.org
>Cc: linux-kernel@...r.kernel.org; linux-pci@...r.kernel.org
>Subject: [PATCH v10 14/17] cxl/pci: Introduce CXL Endpoint protocol error
>handlers
>
>CXL Endpoint protocol errors are currently handled using PCI error handlers. The
>CXL Endpoint requires CXL specific handling in the case of uncorrectable error
>(UCE) handling not provided by the PCI handlers.
>
>Add CXL specific handlers for CXL Endpoints. Rename the existing
>cxl_error_handlers to be pci_error_handlers to more correctly indicate the
>error type and follow naming consistency.
>
>The PCI handlers will be called if the CXL device is not trained for alternate
>protocol (CXL). Update the CXL Endpoint PCI handlers to call the CXL UCE
>handlers.
>
>The existing EP UCE handler includes checks for various results. These are no
>longer needed because CXL UCE recovery will not be attempted. Implement
>cxl_handle_ras() to return PCI_ERS_RESULT_NONE or PCI_ERS_RESULT_PANIC.
>The CXL UCE handler is called by cxl_do_recovery() that acts on the return
>value. In the case of the PCI handler path, call panic() if the result is
>PCI_ERS_RESULT_PANIC.
>
>Signed-off-by: Terry Bowman <terry.bowman@....com>
>Reviewed-by: Kuppuswamy Sathyanarayanan
><sathyanarayanan.kuppuswamy@...ux.intel.com>
>---
> drivers/cxl/core/native_ras.c | 15 ++++---
> drivers/cxl/core/pci.c | 77 ++++++++++++++++++-----------------
> drivers/cxl/cxl.h | 4 ++
> drivers/cxl/cxlpci.h | 6 +--
> drivers/cxl/pci.c | 8 ++--
> 5 files changed, 59 insertions(+), 51 deletions(-)
>
[...]
>diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index
>887b54cf3395..7209ffb5c2fe 100644
>--- a/drivers/cxl/core/pci.c
>+++ b/drivers/cxl/core/pci.c
>@@ -705,8 +705,8 @@ static void header_log_copy(void __iomem *ras_base,
>u32 *log)
> * Log the state of the RAS status registers and prepare them to log the
> * next error status. Return 1 if reset needed.
> */
>-static bool cxl_handle_ras(struct device *dev, u64 serial,
>- void __iomem *ras_base)
>+static pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial,
>+ void __iomem *ras_base)
> {
> u32 hl[CXL_HEADERLOG_SIZE_U32];
> void __iomem *addr;
>@@ -715,13 +715,13 @@ static bool cxl_handle_ras(struct device *dev, u64
>serial,
>
> if (!ras_base) {
> dev_warn_once(dev, "CXL RAS register block is not mapped");
>- return false;
>+ return PCI_ERS_RESULT_NONE;
> }
>
> addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
> status = readl(addr);
> if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
>- return false;
>+ return PCI_ERS_RESULT_NONE;
>
> /* If multiple errors, log header points to first error from ctrl reg */
> if (hweight32(status) > 1) {
>@@ -738,7 +738,7 @@ static bool cxl_handle_ras(struct device *dev, u64 serial,
> trace_cxl_aer_uncorrectable_error(dev, serial, status, fe, hl);
> writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
>
>- return true;
>+ return PCI_ERS_RESULT_PANIC;
> }
>
> #ifdef CONFIG_PCIEAER_CXL
>@@ -833,13 +833,14 @@ static void cxl_handle_rdport_errors(struct
>cxl_dev_state *cxlds) static void cxl_handle_rdport_errors(struct cxl_dev_state
>*cxlds) { } #endif
>
>-void cxl_cor_error_detected(struct pci_dev *pdev)
>+void cxl_cor_error_detected(struct device *dev)
> {
>+ struct pci_dev *pdev = to_pci_dev(dev);
> struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
>- struct device *dev = &cxlds->cxlmd->dev;
>+ struct device *cxlmd_dev = &cxlds->cxlmd->dev;
>
>- scoped_guard(device, dev) {
>- if (!dev->driver) {
>+ scoped_guard(device, cxlmd_dev) {
>+ if (!cxlmd_dev->driver) {
> dev_warn(&pdev->dev,
> "%s: memdev disabled, abort error
>handling\n",
> dev_name(dev));
>@@ -854,20 +855,26 @@ void cxl_cor_error_detected(struct pci_dev *pdev) }
>EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL");
>
>-pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
>- pci_channel_state_t state)
>+void pci_cor_error_detected(struct pci_dev *pdev)
> {
>- struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
>- struct cxl_memdev *cxlmd = cxlds->cxlmd;
>- struct device *dev = &cxlmd->dev;
>- bool ue;
>+ cxl_cor_error_detected(&pdev->dev);
>+}
>+EXPORT_SYMBOL_NS_GPL(pci_cor_error_detected, "CXL");
>
>- scoped_guard(device, dev) {
>- if (!dev->driver) {
>+pci_ers_result_t cxl_error_detected(struct device *dev) {
>+ struct pci_dev *pdev = to_pci_dev(dev);
>+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
>+ struct device *cxlmd_dev = &cxlds->cxlmd->dev;
>+ pci_ers_result_t ue;
>+
>+ scoped_guard(device, cxlmd_dev) {
>+
Please remove the extra blank line.
>+ if (!cxlmd_dev->driver) {
> dev_warn(&pdev->dev,
> "%s: memdev disabled, abort error
>handling\n",
> dev_name(dev));
Thanks,
Shiju
Powered by blists - more mailing lists