[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <2a922e66-dd0a-4f51-918b-92e5b40293f5@amd.com>
Date: Mon, 6 Oct 2025 16:28:03 -0500
From: "Bowman, Terry" <terry.bowman@....com>
To: "Cheatham, Benjamin" <benjamin.cheatham@....com>
Cc: linux-kernel@...r.kernel.org, linux-pci@...r.kernel.org,
dave@...olabs.net, jonathan.cameron@...wei.com, dave.jiang@...el.com,
alison.schofield@...el.com, dan.j.williams@...el.com, bhelgaas@...gle.com,
shiju.jose@...wei.com, ming.li@...omail.com,
Smita.KoralahalliChannabasappa@....com, rrichter@....com,
dan.carpenter@...aro.org, PradeepVineshReddy.Kodamati@....com,
lukas@...ner.de, sathyanarayanan.kuppuswamy@...ux.intel.com,
linux-cxl@...r.kernel.org, alucerop@....com, ira.weiny@...el.com
Subject: Re: [PATCH v12 21/25] CXL/PCI: Introduce CXL Port protocol error
handlers
On 10/3/2025 3:12 PM, Cheatham, Benjamin wrote:
> On 9/25/2025 5:34 PM, Terry Bowman wrote:
>> Introduce CXL error handlers for CXL Port devices.
>>
>> Add functions cxl_port_cor_error_detected() and cxl_port_error_detected().
>> These will serve as the handlers for all CXL Port devices. Introduce
>> cxl_get_ras_base() to provide the RAS base address needed by the handlers.
>>
>> Update cxl_handle_proto_error() to call the CXL Port or CXL Endpoint
>> handler depending on which CXL device reports the error.
>>
>> Implement cxl_get_ras_base() to return the cached RAS register address of a
>> CXL Root Port, CXL Downstream Port, or CXL Upstream Port.
>>
>> Introduce get_pci_cxl_host_dev() to return the host responsible for
>> releasing the RAS mapped resources. CXL endpoints do not use a host to
>> manage its resources, allow for NULL in the case of an EP. Use reference
>> count increment on the host to prevent resource release. Make the caller
>> responsible for the reference decrement.
>>
>> Update the AER driver's is_cxl_error() PCI type check because CXL Port
>> devices are now supported.
>>
>> Signed-off-by: Terry Bowman <terry.bowman@....com>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@...wei.com>
>> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@...ux.intel.com>
>>
>> ---
>>
>> Changes in v11->v12:
>> - Add call to cxl_pci_drv_bound() in cxl_handle_proto_error() and
>> pci_to_cxl_dev()
>> - Change cxl_error_detected() -> cxl_cor_error_detected()
>> - Remove NULL variable assignments
>> - Replace bus_find_device() with find_cxl_port_by_uport() for upstream
>> port searches.
>>
>> Changes in v10->v11:
>> - None
>> ---
>> drivers/cxl/core/core.h | 10 +++
>> drivers/cxl/core/port.c | 7 +-
>> drivers/cxl/core/ras.c | 159 ++++++++++++++++++++++++++++++++--
>> drivers/pci/pcie/aer_cxl_vh.c | 5 +-
>> 4 files changed, 170 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
>> index 9ceff8acf844..3197a71bf7b8 100644
>> --- a/drivers/cxl/core/core.h
>> +++ b/drivers/cxl/core/core.h
>> @@ -156,6 +156,8 @@ pci_ers_result_t pci_error_detected(struct pci_dev *pdev,
>> void pci_cor_error_detected(struct pci_dev *pdev);
>> void cxl_cor_error_detected(struct device *dev);
>> pci_ers_result_t cxl_error_detected(struct device *dev);
>> +void cxl_port_cor_error_detected(struct device *dev);
>> +pci_ers_result_t cxl_port_error_detected(struct device *dev);
>> #else
>> static inline int cxl_ras_init(void)
>> {
>> @@ -180,9 +182,17 @@ static inline pci_ers_result_t cxl_error_detected(struct device *dev)
>> {
>> return PCI_ERS_RESULT_NONE;
>> }
>> +static inline void cxl_port_cor_error_detected(struct device *dev) { }
>> +static inline pci_ers_result_t cxl_port_error_detected(struct device *dev)
>> +{
>> + return PCI_ERS_RESULT_NONE;
> Same question as endpoint error handler on if this should be a PCI_ERS_RESULT_PANIC instead.
PCI_ERS_RESULT_NONE is correct. If CONFIG_CXL_RAS is disabled we don't want CXL RAS logic
running including the handling.
>> +}
>> #endif // CONFIG_CXL_RAS
> Wrong comment style.
Ok
>>
>> int cxl_gpf_port_setup(struct cxl_dport *dport);
>> +struct cxl_port *find_cxl_port(struct device *dport_dev,
>> + struct cxl_dport **dport);
>> +struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev);
>>
>> struct cxl_hdm;
>> int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
>> index 56fa4ac33e8b..f34a44abb2c9 100644
>> --- a/drivers/cxl/core/port.c
>> +++ b/drivers/cxl/core/port.c
>> @@ -1357,8 +1357,8 @@ static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx)
>> return NULL;
>> }
>>
>> -static struct cxl_port *find_cxl_port(struct device *dport_dev,
>> - struct cxl_dport **dport)
>> +struct cxl_port *find_cxl_port(struct device *dport_dev,
>> + struct cxl_dport **dport)
>> {
>> struct cxl_find_port_ctx ctx = {
>> .dport_dev = dport_dev,
>> @@ -1561,7 +1561,7 @@ static int match_port_by_uport(struct device *dev, const void *data)
>> * Function takes a device reference on the port device. Caller should do a
>> * put_device() when done.
>> */
>> -static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
>> +struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
>> {
>> struct device *dev;
>>
>> @@ -1570,6 +1570,7 @@ static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
>> return to_cxl_port(dev);
>> return NULL;
>> }
>> +EXPORT_SYMBOL_NS_GPL(find_cxl_port_by_uport, "CXL");
>>
>> static int update_decoder_targets(struct device *dev, void *data)
>> {
>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
>> index 9acfe24ba3bb..7e8d63c32d72 100644
>> --- a/drivers/cxl/core/ras.c
>> +++ b/drivers/cxl/core/ras.c
>> @@ -250,6 +250,129 @@ static void cxl_dport_map_ras(struct cxl_dport *dport)
>> dev_dbg(dev, "Failed to map RAS capability.\n");
>> }
>>
>> +static void __iomem *cxl_get_ras_base(struct device *dev)
>> +{
>> + struct pci_dev *pdev = to_pci_dev(dev);
>> +
>> + switch (pci_pcie_type(pdev)) {
>> + case PCI_EXP_TYPE_ROOT_PORT:
>> + case PCI_EXP_TYPE_DOWNSTREAM:
>> + {
>> + struct cxl_dport *dport;
>> + struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
>> +
>> + if (!dport || !dport->dport_dev) {
>> + pci_err(pdev, "Failed to find the CXL device");
>> + return NULL;
>> + }
>> +
>> + return dport->regs.ras;
>> + }
>> + case PCI_EXP_TYPE_UPSTREAM:
>> + {
>> + struct cxl_port *port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
>> +
>> + if (!port) {
>> + pci_err(pdev, "Failed to find the CXL device");
>> + return NULL;
>> + }
>> +
>> + return port->uport_regs.ras;
>> + }
>> + }
>> +
>> + dev_warn_once(dev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev));
>> + return NULL;
>> +}
>> +
>> +static struct device *pci_to_cxl_dev(struct pci_dev *pdev)
>> +{
>> + switch (pci_pcie_type(pdev)) {
>> + case PCI_EXP_TYPE_ROOT_PORT:
>> + case PCI_EXP_TYPE_DOWNSTREAM:
>> + {
>> + struct cxl_dport *dport;
>> + struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
>> +
>> + if (!port) {
>> + pci_err(pdev, "Failed to find the CXL device");
>> + return NULL;
>> + }
>> +
>> + return dport->dport_dev;
>> + }
>> + case PCI_EXP_TYPE_UPSTREAM:
>> + {
>> + struct cxl_port *port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
>> +
>> + if (!port) {
>> + pci_err(pdev, "Failed to find the CXL device");
>> + return NULL;
>> + }
>> +
>> + return port->uport_dev;
>> + }
>> + case PCI_EXP_TYPE_ENDPOINT:
>> + {
>> + struct cxl_dev_state *cxlds;
>> +
>> + if (!cxl_pci_drv_bound(pdev))
>> + return NULL;
>> +
>> + cxlds = pci_get_drvdata(pdev);
>> + return cxlds->dev;
>> + }
>> + }
>> +
>> + pci_warn_once(pdev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev));
>> + return NULL;
>> +}
>> +
>> +/*
>> + * Return 'struct device *' responsible for freeing pdev's CXL resources.
>> + * Caller is responsible for reference count decrementing the return
>> + * 'struct device *'.
>> + *
>> + * dev: Find the host of this dev
>> + */
>> +static struct device *get_cxl_host_dev(struct device *dev)
>> +{
>> + struct pci_dev *pdev = to_pci_dev(dev);
>> +
>> + switch (pci_pcie_type(pdev)) {
>> + case PCI_EXP_TYPE_ROOT_PORT:
>> + case PCI_EXP_TYPE_DOWNSTREAM:
>> + {
>> + struct cxl_dport *dport;
>> + struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
>> +
>> + if (!port) {
>> + pci_err(pdev, "Failed to find the CXL device");
>> + return NULL;
>> + }
>> +
>> + return &port->dev;
> I may just be tired, but won't the __free() action get called here unless you use no_free_ptr()?
> You do the same thing with cxl_get_ras_base() and pci_to_cxl_dev() above, though I think it's the
> intended behavior for the latter function.
This needs updating to not use __free. Thanks.
Terry
Powered by blists - more mailing lists