lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <2a922e66-dd0a-4f51-918b-92e5b40293f5@amd.com>
Date: Mon, 6 Oct 2025 16:28:03 -0500
From: "Bowman, Terry" <terry.bowman@....com>
To: "Cheatham, Benjamin" <benjamin.cheatham@....com>
Cc: linux-kernel@...r.kernel.org, linux-pci@...r.kernel.org,
 dave@...olabs.net, jonathan.cameron@...wei.com, dave.jiang@...el.com,
 alison.schofield@...el.com, dan.j.williams@...el.com, bhelgaas@...gle.com,
 shiju.jose@...wei.com, ming.li@...omail.com,
 Smita.KoralahalliChannabasappa@....com, rrichter@....com,
 dan.carpenter@...aro.org, PradeepVineshReddy.Kodamati@....com,
 lukas@...ner.de, sathyanarayanan.kuppuswamy@...ux.intel.com,
 linux-cxl@...r.kernel.org, alucerop@....com, ira.weiny@...el.com
Subject: Re: [PATCH v12 21/25] CXL/PCI: Introduce CXL Port protocol error
 handlers



On 10/3/2025 3:12 PM, Cheatham, Benjamin wrote:
> On 9/25/2025 5:34 PM, Terry Bowman wrote:
>> Introduce CXL error handlers for CXL Port devices.
>>
>> Add functions cxl_port_cor_error_detected() and cxl_port_error_detected().
>> These will serve as the handlers for all CXL Port devices. Introduce
>> cxl_get_ras_base() to provide the RAS base address needed by the handlers.
>>
>> Update cxl_handle_proto_error() to call the CXL Port or CXL Endpoint
>> handler depending on which CXL device reports the error.
>>
>> Implement cxl_get_ras_base() to return the cached RAS register address of a
>> CXL Root Port, CXL Downstream Port, or CXL Upstream Port.
>>
>> Introduce get_pci_cxl_host_dev() to return the host responsible for
>> releasing the RAS mapped resources. CXL endpoints do not use a host to
>> manage its resources, allow for NULL in the case of an EP. Use reference
>> count increment on the host to prevent resource release. Make the caller
>> responsible for the reference decrement.
>>
>> Update the AER driver's is_cxl_error() PCI type check because CXL Port
>> devices are now supported.
>>
>> Signed-off-by: Terry Bowman <terry.bowman@....com>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@...wei.com>
>> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@...ux.intel.com>
>>
>> ---
>>
>> Changes in v11->v12:
>> - Add call to cxl_pci_drv_bound() in cxl_handle_proto_error() and
>>   pci_to_cxl_dev()
>> - Change cxl_error_detected() -> cxl_cor_error_detected()
>> - Remove NULL variable assignments
>> - Replace bus_find_device() with find_cxl_port_by_uport() for upstream
>>   port searches.
>>
>> Changes in v10->v11:
>> - None
>> ---
>>  drivers/cxl/core/core.h       |  10 +++
>>  drivers/cxl/core/port.c       |   7 +-
>>  drivers/cxl/core/ras.c        | 159 ++++++++++++++++++++++++++++++++--
>>  drivers/pci/pcie/aer_cxl_vh.c |   5 +-
>>  4 files changed, 170 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
>> index 9ceff8acf844..3197a71bf7b8 100644
>> --- a/drivers/cxl/core/core.h
>> +++ b/drivers/cxl/core/core.h
>> @@ -156,6 +156,8 @@ pci_ers_result_t pci_error_detected(struct pci_dev *pdev,
>>  void pci_cor_error_detected(struct pci_dev *pdev);
>>  void cxl_cor_error_detected(struct device *dev);
>>  pci_ers_result_t cxl_error_detected(struct device *dev);
>> +void cxl_port_cor_error_detected(struct device *dev);
>> +pci_ers_result_t cxl_port_error_detected(struct device *dev);
>>  #else
>>  static inline int cxl_ras_init(void)
>>  {
>> @@ -180,9 +182,17 @@ static inline pci_ers_result_t cxl_error_detected(struct device *dev)
>>  {
>>  	return PCI_ERS_RESULT_NONE;
>>  }
>> +static inline void cxl_port_cor_error_detected(struct device *dev) { }
>> +static inline pci_ers_result_t cxl_port_error_detected(struct device *dev)
>> +{
>> +	return PCI_ERS_RESULT_NONE;
> Same question as endpoint error handler on if this should be a PCI_ERS_RESULT_PANIC instead.

PCI_ERS_RESULT_NONE is correct. If CONFIG_CXL_RAS is disabled we don't want CXL RAS logic
running including the handling.

>> +}
>>  #endif // CONFIG_CXL_RAS
> Wrong comment style.
Ok
>>  
>>  int cxl_gpf_port_setup(struct cxl_dport *dport);
>> +struct cxl_port *find_cxl_port(struct device *dport_dev,
>> +			       struct cxl_dport **dport);
>> +struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev);
>>  
>>  struct cxl_hdm;
>>  int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
>> index 56fa4ac33e8b..f34a44abb2c9 100644
>> --- a/drivers/cxl/core/port.c
>> +++ b/drivers/cxl/core/port.c
>> @@ -1357,8 +1357,8 @@ static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx)
>>  	return NULL;
>>  }
>>  
>> -static struct cxl_port *find_cxl_port(struct device *dport_dev,
>> -				      struct cxl_dport **dport)
>> +struct cxl_port *find_cxl_port(struct device *dport_dev,
>> +			       struct cxl_dport **dport)
>>  {
>>  	struct cxl_find_port_ctx ctx = {
>>  		.dport_dev = dport_dev,
>> @@ -1561,7 +1561,7 @@ static int match_port_by_uport(struct device *dev, const void *data)
>>   * Function takes a device reference on the port device. Caller should do a
>>   * put_device() when done.
>>   */
>> -static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
>> +struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
>>  {
>>  	struct device *dev;
>>  
>> @@ -1570,6 +1570,7 @@ static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
>>  		return to_cxl_port(dev);
>>  	return NULL;
>>  }
>> +EXPORT_SYMBOL_NS_GPL(find_cxl_port_by_uport, "CXL");
>>  
>>  static int update_decoder_targets(struct device *dev, void *data)
>>  {
>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
>> index 9acfe24ba3bb..7e8d63c32d72 100644
>> --- a/drivers/cxl/core/ras.c
>> +++ b/drivers/cxl/core/ras.c
>> @@ -250,6 +250,129 @@ static void cxl_dport_map_ras(struct cxl_dport *dport)
>>  		dev_dbg(dev, "Failed to map RAS capability.\n");
>>  }
>>  
>> +static void __iomem *cxl_get_ras_base(struct device *dev)
>> +{
>> +	struct pci_dev *pdev = to_pci_dev(dev);
>> +
>> +	switch (pci_pcie_type(pdev)) {
>> +	case PCI_EXP_TYPE_ROOT_PORT:
>> +	case PCI_EXP_TYPE_DOWNSTREAM:
>> +	{
>> +		struct cxl_dport *dport;
>> +		struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
>> +
>> +		if (!dport || !dport->dport_dev) {
>> +			pci_err(pdev, "Failed to find the CXL device");
>> +			return NULL;
>> +		}
>> +
>> +		return dport->regs.ras;
>> +	}
>> +	case PCI_EXP_TYPE_UPSTREAM:
>> +	{
>> +		struct cxl_port *port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
>> +
>> +		if (!port) {
>> +			pci_err(pdev, "Failed to find the CXL device");
>> +			return NULL;
>> +		}
>> +
>> +		return port->uport_regs.ras;
>> +	}
>> +	}
>> +
>> +	dev_warn_once(dev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev));
>> +	return NULL;
>> +}
>> +
>> +static struct device *pci_to_cxl_dev(struct pci_dev *pdev)
>> +{
>> +	switch (pci_pcie_type(pdev)) {
>> +	case PCI_EXP_TYPE_ROOT_PORT:
>> +	case PCI_EXP_TYPE_DOWNSTREAM:
>> +	{
>> +		struct cxl_dport *dport;
>> +		struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
>> +
>> +		if (!port) {
>> +			pci_err(pdev, "Failed to find the CXL device");
>> +			return NULL;
>> +		}
>> +
>> +		return dport->dport_dev;
>> +	}
>> +	case PCI_EXP_TYPE_UPSTREAM:
>> +	{
>> +		struct cxl_port *port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
>> +
>> +		if (!port) {
>> +			pci_err(pdev, "Failed to find the CXL device");
>> +			return NULL;
>> +		}
>> +
>> +		return port->uport_dev;
>> +	}
>> +	case PCI_EXP_TYPE_ENDPOINT:
>> +	{
>> +		struct cxl_dev_state *cxlds;
>> +
>> +		if (!cxl_pci_drv_bound(pdev))
>> +			return NULL;
>> +
>> +		cxlds = pci_get_drvdata(pdev);
>> +		return cxlds->dev;
>> +	}
>> +	}
>> +
>> +	pci_warn_once(pdev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev));
>> +	return NULL;
>> +}
>> +
>> +/*
>> + * Return 'struct device *' responsible for freeing pdev's CXL resources.
>> + * Caller is responsible for reference count decrementing the return
>> + * 'struct device *'.
>> + *
>> + * dev: Find the host of this dev
>> + */
>> +static struct device *get_cxl_host_dev(struct device *dev)
>> +{
>> +	struct pci_dev *pdev = to_pci_dev(dev);
>> +
>> +	switch (pci_pcie_type(pdev)) {
>> +	case PCI_EXP_TYPE_ROOT_PORT:
>> +	case PCI_EXP_TYPE_DOWNSTREAM:
>> +	{
>> +		struct cxl_dport *dport;
>> +		struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
>> +
>> +		if (!port) {
>> +			pci_err(pdev, "Failed to find the CXL device");
>> +			return NULL;
>> +		}
>> +
>> +		return &port->dev;
> I may just be tired, but won't the __free() action get called here unless you use no_free_ptr()?
> You do the same thing with cxl_get_ras_base() and pci_to_cxl_dev() above, though I think it's the
> intended behavior for the latter function.
This needs updating to not use __free. Thanks.

Terry


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ