[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250626224252.1415009-5-terry.bowman@amd.com>
Date: Thu, 26 Jun 2025 17:42:39 -0500
From: Terry Bowman <terry.bowman@....com>
To: <dave@...olabs.net>, <jonathan.cameron@...wei.com>,
<dave.jiang@...el.com>, <alison.schofield@...el.com>,
<dan.j.williams@...el.com>, <bhelgaas@...gle.com>, <shiju.jose@...wei.com>,
<ming.li@...omail.com>, <Smita.KoralahalliChannabasappa@....com>,
<rrichter@....com>, <dan.carpenter@...aro.org>,
<PradeepVineshReddy.Kodamati@....com>, <lukas@...ner.de>,
<Benjamin.Cheatham@....com>, <sathyanarayanan.kuppuswamy@...ux.intel.com>,
<terry.bowman@....com>, <linux-cxl@...r.kernel.org>
CC: <linux-kernel@...r.kernel.org>, <linux-pci@...r.kernel.org>
Subject: [PATCH v10 04/17] CXL/AER: Introduce CXL specific AER driver file
The CXL AER error handling logic currently resides in the AER driver file,
drivers/pci/pcie/aer.c. CXL specific changes are conditionally compiled
using #ifdefs.
Improve the AER driver maintainability by separating the CXL specific logic
from the AER driver's core functionality and removing the #ifdefs.
Introduce drivers/pci/pcie/cxl_aer.c and move the CXL AER logic into the
new file.
Update the makefile to conditionally compile the CXL file using the
existing CONFIG_PCIEAER_CXL Kconfig.
Signed-off-by: Terry Bowman <terry.bowman@....com>
---
drivers/pci/pci.h | 8 +++
drivers/pci/pcie/Makefile | 1 +
drivers/pci/pcie/aer.c | 138 -------------------------------------
drivers/pci/pcie/cxl_aer.c | 138 +++++++++++++++++++++++++++++++++++++
include/linux/pci_ids.h | 2 +
5 files changed, 149 insertions(+), 138 deletions(-)
create mode 100644 drivers/pci/pcie/cxl_aer.c
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index a0d1e59b5666..91b583cf18eb 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1029,6 +1029,14 @@ static inline void pci_save_aer_state(struct pci_dev *dev) { }
static inline void pci_restore_aer_state(struct pci_dev *dev) { }
#endif
+#ifdef CONFIG_PCIEAER_CXL
+void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info);
+void cxl_rch_enable_rcec(struct pci_dev *rcec);
+#else
+static inline void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) { }
+static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { }
+#endif
+
#ifdef CONFIG_ACPI
bool pci_acpi_preserve_config(struct pci_host_bridge *bridge);
int pci_acpi_program_hp_params(struct pci_dev *dev);
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index 173829aa02e6..cd2cb925dbd5 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o
obj-y += aspm.o
obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o
+obj-$(CONFIG_PCIEAER_CXL) += cxl_aer.o
obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o
obj-$(CONFIG_PCIE_PME) += pme.o
obj-$(CONFIG_PCIE_DPC) += dpc.o
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index a2df9456595a..0b4d721980ef 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1094,144 +1094,6 @@ static bool find_source_device(struct pci_dev *parent,
return true;
}
-#ifdef CONFIG_PCIEAER_CXL
-
-/**
- * pci_aer_unmask_internal_errors - unmask internal errors
- * @dev: pointer to the pci_dev data structure
- *
- * Unmask internal errors in the Uncorrectable and Correctable Error
- * Mask registers.
- *
- * Note: AER must be enabled and supported by the device which must be
- * checked in advance, e.g. with pcie_aer_is_native().
- */
-static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
-{
- int aer = dev->aer_cap;
- u32 mask;
-
- pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask);
- mask &= ~PCI_ERR_UNC_INTN;
- pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask);
-
- pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask);
- mask &= ~PCI_ERR_COR_INTERNAL;
- pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
-}
-
-static bool is_cxl_mem_dev(struct pci_dev *dev)
-{
- /*
- * The capability, status, and control fields in Device 0,
- * Function 0 DVSEC control the CXL functionality of the
- * entire device (CXL 3.0, 8.1.3).
- */
- if (dev->devfn != PCI_DEVFN(0, 0))
- return false;
-
- /*
- * CXL Memory Devices must have the 502h class code set (CXL
- * 3.0, 8.1.12.1).
- */
- if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL)
- return false;
-
- return true;
-}
-
-static bool cxl_error_is_native(struct pci_dev *dev)
-{
- struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
-
- return (pcie_ports_native || host->native_aer);
-}
-
-static bool is_internal_error(struct aer_err_info *info)
-{
- if (info->severity == AER_CORRECTABLE)
- return info->status & PCI_ERR_COR_INTERNAL;
-
- return info->status & PCI_ERR_UNC_INTN;
-}
-
-static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
-{
- struct aer_err_info *info = (struct aer_err_info *)data;
- const struct pci_error_handlers *err_handler;
-
- if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev))
- return 0;
-
- /* Protect dev->driver */
- device_lock(&dev->dev);
-
- err_handler = dev->driver ? dev->driver->err_handler : NULL;
- if (!err_handler)
- goto out;
-
- if (info->severity == AER_CORRECTABLE) {
- if (err_handler->cor_error_detected)
- err_handler->cor_error_detected(dev);
- } else if (err_handler->error_detected) {
- if (info->severity == AER_NONFATAL)
- err_handler->error_detected(dev, pci_channel_io_normal);
- else if (info->severity == AER_FATAL)
- err_handler->error_detected(dev, pci_channel_io_frozen);
- }
-out:
- device_unlock(&dev->dev);
- return 0;
-}
-
-static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
-{
- /*
- * Internal errors of an RCEC indicate an AER error in an
- * RCH's downstream port. Check and handle them in the CXL.mem
- * device driver.
- */
- if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
- is_internal_error(info))
- pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
-}
-
-static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
-{
- bool *handles_cxl = data;
-
- if (!*handles_cxl)
- *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
-
- /* Non-zero terminates iteration */
- return *handles_cxl;
-}
-
-static bool handles_cxl_errors(struct pci_dev *rcec)
-{
- bool handles_cxl = false;
-
- if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
- pcie_aer_is_native(rcec))
- pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl);
-
- return handles_cxl;
-}
-
-static void cxl_rch_enable_rcec(struct pci_dev *rcec)
-{
- if (!handles_cxl_errors(rcec))
- return;
-
- pci_aer_unmask_internal_errors(rcec);
- pci_info(rcec, "CXL: Internal errors unmasked");
-}
-
-#else
-static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { }
-static inline void cxl_rch_handle_error(struct pci_dev *dev,
- struct aer_err_info *info) { }
-#endif
/**
* pci_aer_handle_error - handle logging error into an event log
diff --git a/drivers/pci/pcie/cxl_aer.c b/drivers/pci/pcie/cxl_aer.c
new file mode 100644
index 000000000000..b2ea14f70055
--- /dev/null
+++ b/drivers/pci/pcie/cxl_aer.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2025 AMD Corporation. All rights reserved. */
+
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include "../pci.h"
+
+/**
+ * pci_aer_unmask_internal_errors - unmask internal errors
+ * @dev: pointer to the pci_dev data structure
+ *
+ * Unmask internal errors in the Uncorrectable and Correctable Error
+ * Mask registers.
+ *
+ * Note: AER must be enabled and supported by the device which must be
+ * checked in advance, e.g. with pcie_aer_is_native().
+ */
+static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
+{
+ int aer = dev->aer_cap;
+ u32 mask;
+
+ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask);
+ mask &= ~PCI_ERR_UNC_INTN;
+ pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask);
+
+ pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask);
+ mask &= ~PCI_ERR_COR_INTERNAL;
+ pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
+}
+
+static bool is_cxl_mem_dev(struct pci_dev *dev)
+{
+ /*
+ * The capability, status, and control fields in Device 0,
+ * Function 0 DVSEC control the CXL functionality of the
+ * entire device (CXL 3.2, 8.1.3).
+ */
+ if (dev->devfn != PCI_DEVFN(0, 0))
+ return false;
+
+ /*
+ * CXL Memory Devices must have the 502h class code set (CXL
+ * 3.2, 8.1.12.1).
+ */
+ if (FIELD_GET(PCI_CLASS_CODE_MASK, dev->class) != PCI_CLASS_MEMORY_CXL)
+ return false;
+
+ return true;
+}
+
+static bool cxl_error_is_native(struct pci_dev *dev)
+{
+ struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+
+ return (pcie_ports_native || host->native_aer);
+}
+
+static bool is_internal_error(struct aer_err_info *info)
+{
+ if (info->severity == AER_CORRECTABLE)
+ return info->status & PCI_ERR_COR_INTERNAL;
+
+ return info->status & PCI_ERR_UNC_INTN;
+}
+
+static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
+{
+ struct aer_err_info *info = (struct aer_err_info *)data;
+ const struct pci_error_handlers *err_handler;
+
+ if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev))
+ return 0;
+
+ /* Protect dev->driver */
+ device_lock(&dev->dev);
+
+ err_handler = dev->driver ? dev->driver->err_handler : NULL;
+ if (!err_handler)
+ goto out;
+
+ if (info->severity == AER_CORRECTABLE) {
+ if (err_handler->cor_error_detected)
+ err_handler->cor_error_detected(dev);
+ } else if (err_handler->error_detected) {
+ if (info->severity == AER_NONFATAL)
+ err_handler->error_detected(dev, pci_channel_io_normal);
+ else if (info->severity == AER_FATAL)
+ err_handler->error_detected(dev, pci_channel_io_frozen);
+ }
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
+{
+ /*
+ * Internal errors of an RCEC indicate an AER error in an
+ * RCH's downstream port. Check and handle them in the CXL.mem
+ * device driver.
+ */
+ if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
+ is_internal_error(info))
+ pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
+}
+
+static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
+{
+ bool *handles_cxl = data;
+
+ if (!*handles_cxl)
+ *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
+
+ /* Non-zero terminates iteration */
+ return *handles_cxl;
+}
+
+static bool handles_cxl_errors(struct pci_dev *rcec)
+{
+ bool handles_cxl = false;
+
+ if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
+ pcie_aer_is_native(rcec))
+ pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl);
+
+ return handles_cxl;
+}
+
+void cxl_rch_enable_rcec(struct pci_dev *rcec)
+{
+ if (!handles_cxl_errors(rcec))
+ return;
+
+ pci_aer_unmask_internal_errors(rcec);
+ pci_info(rcec, "CXL: Internal errors unmasked");
+}
+
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index e2d71b6fdd84..31b3935bf189 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -12,6 +12,8 @@
/* Device classes and subclasses */
+#define PCI_CLASS_CODE_MASK 0xFFFF00
+
#define PCI_CLASS_NOT_DEFINED 0x0000
#define PCI_CLASS_NOT_DEFINED_VGA 0x0001
--
2.34.1
Powered by blists - more mailing lists