lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260115161226.00004845@huawei.com>
Date: Thu, 15 Jan 2026 16:12:26 +0000
From: Jonathan Cameron <jonathan.cameron@...wei.com>
To: Dave Jiang <dave.jiang@...el.com>
CC: Terry Bowman <terry.bowman@....com>, <dave@...olabs.net>,
	<alison.schofield@...el.com>, <dan.j.williams@...el.com>,
	<bhelgaas@...gle.com>, <shiju.jose@...wei.com>, <ming.li@...omail.com>,
	<Smita.KoralahalliChannabasappa@....com>, <rrichter@....com>,
	<dan.carpenter@...aro.org>, <PradeepVineshReddy.Kodamati@....com>,
	<lukas@...ner.de>, <Benjamin.Cheatham@....com>,
	<sathyanarayanan.kuppuswamy@...ux.intel.com>, <linux-cxl@...r.kernel.org>,
	<vishal.l.verma@...el.com>, <alucerop@....com>, <ira.weiny@...el.com>,
	<linux-kernel@...r.kernel.org>, <linux-pci@...r.kernel.org>
Subject: Re: [PATCH v14 31/34] PCI: Introduce CXL Port protocol error
 handlers


> > diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
> > index 0c640b84ad70..96ce85cc0a46 100644
> > --- a/drivers/cxl/core/ras.c
> > +++ b/drivers/cxl/core/ras.c

> > +
> > +static pci_ers_result_t cxl_port_error_detected(struct device *dev);
> > +
> > +static void cxl_do_recovery(struct pci_dev *pdev)
> > +{
> > +	struct cxl_port *port __free(put_cxl_port) = get_cxl_port(pdev);  
> To minimize errors, move this line to right above when you check !port. It's acceptable to do inline declaration when it comes cleanup macros. 
> 
> DJ
> > +	pci_ers_result_t status;
> > +
> > +	if (!port) {
> > +		pci_err(pdev, "Failed to find the CXL device\n");
> > +		return;
> > +	}
> > +
> > +	status = cxl_port_error_detected(&pdev->dev);
> > +	if (status == PCI_ERS_RESULT_PANIC)
> > +		panic("CXL cachemem error.");
> > +
> > +	/*
> > +	 * If we have native control of AER, clear error status in the device
> > +	 * that detected the error.  If the platform retained control of AER,
> > +	 * it is responsible for clearing this status.  In that case, the
> > +	 * signaling device may not even be visible to the OS.
> > +	 */
> > +	if (pcie_aer_is_native(pdev)) {
> > +		pcie_clear_device_status(pdev);
> > +		pci_aer_clear_nonfatal_status(pdev);
> > +		pci_aer_clear_fatal_status(pdev);
> > +	}
> > +}

> > +
> >  void cxl_cor_error_detected(struct pci_dev *pdev)
> >  {
> >  	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
> > @@ -346,6 +425,24 @@ EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL");
> >  
> >  static void cxl_handle_proto_error(struct cxl_proto_err_work_data *err_info)
> >  {
> > +	struct pci_dev *pdev = err_info->pdev;
> > +
> > +	if (err_info->severity == AER_CORRECTABLE) {
> > +
> > +		if (!pcie_aer_is_native(pdev))
> > +			return;
> > +
> > +		if (pdev->aer_cap)
> > +			pci_clear_and_set_config_dword(pdev,
> > +						       pdev->aer_cap + PCI_ERR_COR_STATUS,
> > +						       0, PCI_ERR_COR_INTERNAL);
> > +
> > +		cxl_port_cor_error_detected(&pdev->dev);
> > +
> > +		pcie_clear_device_status(pdev);
> > +	} else {
> > +		cxl_do_recovery(pdev);
> > +	}

Could flip logic to get out of here quickly in one case.

	if (err_info->severity != AER_CORRECTABLE) {
		cxl_do_recovery(pdev);
		return;
	}

	if (!pci...

just to reduce indent we don't need.  Up to you though.
> >  }
> >  
> >  static void cxl_proto_err_work_fn(struct work_struct *work)
> > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > index 13dbb405dc31..b7bfefdaf990 100644
> > --- a/drivers/pci/pci.c
> > +++ b/drivers/pci/pci.c
> > @@ -2248,6 +2248,7 @@ void pcie_clear_device_status(struct pci_dev *dev)
> >  	pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta);
> >  	pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
> >  }
> > +EXPORT_SYMBOL_GPL(pcie_clear_device_status);
To me it's a little odd that we restrict this to AER
given it's not in AER specific registers or anything like that.

It only happens to be used in that code right now so I guess
it is ok to do this anyway.

> >  #endif

> > diff --git a/drivers/pci/pcie/aer_cxl_vh.c b/drivers/pci/pcie/aer_cxl_vh.c
> > index 0f616f5fafcf..aa69e504302f 100644
> > --- a/drivers/pci/pcie/aer_cxl_vh.c
> > +++ b/drivers/pci/pcie/aer_cxl_vh.c
> > @@ -34,7 +34,10 @@ bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info)
> >  	if (!info || !info->is_cxl)
> >  		return false;
> >  
> > -	if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)
> > +	if ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT) &&
> > +	    (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) &&
> > +	    (pci_pcie_type(pdev) != PCI_EXP_TYPE_UPSTREAM) &&
> > +	    (pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM))
> >  		return false;

Ah.  This fixes the earlier comment.  Maybe add a temp comment
or similar there to say you'll handle others later.
Also, maybe this is cleaner as a switch to avoid all those pci_pcie_type(pdev)
(or a local variable might also work).

	switch (pci_pcie_type(pdev)) {
	case PCI_EXP_TYPE_ENDPOINT:
	case PCI_EXP_TYPE_ROOT_PORT:
	case PCI_EXP_TYPE_UPSTREAM:
	case PCI_EXP_TYPE_DOWNSTREAM:
		return is_aer_internal_error(info);
	default:
		return false;
	}
> >  
> >  	return is_aer_internal_error(info);



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ