lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210625112132.r7p7gqcyajpnnvjp@pali>
Date:   Fri, 25 Jun 2021 13:21:32 +0200
From:   Pali Rohár <pali@...nel.org>
To:     Lorenzo Pieralisi <lorenzo.pieralisi@....com>
Cc:     Bjorn Helgaas <bhelgaas@...gle.com>,
        Thomas Petazzoni <thomas.petazzoni@...tlin.com>,
        Marek Behún <kabel@...nel.org>,
        linux-pci@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [RESEND PATCH 2/3] PCI: aardvark: Fix checking for PIO status

On Friday 25 June 2021 12:04:29 Lorenzo Pieralisi wrote:
> On Thu, Jun 24, 2021 at 11:33:44PM +0200, Pali Rohár wrote:
> 
> [...]
> 
> > -static void advk_pcie_check_pio_status(struct advk_pcie *pcie)
> > +static int advk_pcie_check_pio_status(struct advk_pcie *pcie, u32 *val)
> >  {
> >  	struct device *dev = &pcie->pdev->dev;
> >  	u32 reg;
> > @@ -472,15 +476,50 @@ static void advk_pcie_check_pio_status(struct advk_pcie *pcie)
> >  	status = (reg & PIO_COMPLETION_STATUS_MASK) >>
> >  		PIO_COMPLETION_STATUS_SHIFT;
> >  
> > -	if (!status)
> > -		return;
> > -
> > +	/*
> > +	 * According to HW spec, the PIO status check sequence as below:
> > +	 * 1) even if COMPLETION_STATUS(bit9:7) indicates successful,
> > +	 *    it still needs to check Error Status(bit11), only when this bit
> > +	 *    indicates no error happen, the operation is successful.
> > +	 * 2) value Unsupported Request(1) of COMPLETION_STATUS(bit9:7) only
> > +	 *    means a PIO write error, and for PIO read it is successful with
> > +	 *    a read value of 0xFFFFFFFF.
> > +	 * 3) value Completion Retry Status(CRS) of COMPLETION_STATUS(bit9:7)
> > +	 *    only means a PIO write error, and for PIO read it is successful
> > +	 *    with a read value of 0xFFFF0001.
> > +	 * 4) value Completer Abort (CA) of COMPLETION_STATUS(bit9:7) means
> > +	 *    error for both PIO read and PIO write operation.
> > +	 * 5) other errors are indicated as 'unknown'.
> > +	 */
> >  	switch (status) {
> > +	case PIO_COMPLETION_STATUS_OK:
> > +		if (reg & PIO_ERR_STATUS) {
> > +			strcomp_status = "COMP_ERR";
> > +			break;
> > +		}
> > +		/* Get the read result */
> > +		if (val)
> > +			*val = advk_readl(pcie, PIO_RD_DATA);
> > +		/* No error */
> > +		strcomp_status = NULL;
> > +		break;
> >  	case PIO_COMPLETION_STATUS_UR:
> > -		strcomp_status = "UR";
> > +		if (val) {
> > +			/* For reading, UR is not an error status */
> > +			*val = CFG_RD_UR_VAL;
> > +			strcomp_status = NULL;
> > +		} else {
> > +			strcomp_status = "UR";
> > +		}
> >  		break;
> >  	case PIO_COMPLETION_STATUS_CRS:
> > -		strcomp_status = "CRS";
> > +		if (val) {
> > +			/* For reading, CRS is not an error status */
> > +			*val = CFG_RD_CRS_VAL;
> 
> Need Bjorn's input on this.

Ok.

> I don't think this is what is expected from
> from a root complex according to the PCI specifications (depending on
> whether CSR software visibility is supported or not).

This patch / logic was written and reviewed by Marvell people as is
mentioned in commit description. But I was not able to get any feedback
from them about aardvark, so I have not put them into recipients of this
patch...

> Here we are fabricating a CRS completion value for all PCI config read
> transactions that are hitting a CRS completion status (and that's not
> the expected behaviour according to the PCI specifications and I don't
> think that's correct).

I see what what you mean. I think that for PCI_VENDOR_ID read request it
is correct. But question is what we should return for other read
requests.

> > +			strcomp_status = NULL;
> > +		} else {
> > +			strcomp_status = "CRS";
> > +		}
> >  		break;
> >  	case PIO_COMPLETION_STATUS_CA:
> >  		strcomp_status = "CA";
> > @@ -490,6 +529,9 @@ static void advk_pcie_check_pio_status(struct advk_pcie *pcie)
> >  		break;
> >  	}
> >  
> > +	if (!strcomp_status)
> > +		return 0;
> > +
> >  	if (reg & PIO_NON_POSTED_REQ)
> >  		str_posted = "Non-posted";
> >  	else
> > @@ -497,6 +539,8 @@ static void advk_pcie_check_pio_status(struct advk_pcie *pcie)
> >  
> >  	dev_err(dev, "%s PIO Response Status: %s, %#x @ %#x\n",
> >  		str_posted, strcomp_status, reg, advk_readl(pcie, PIO_ADDR_LS));
> > +
> > +	return -EFAULT;
> >  }
> >  
> >  static int advk_pcie_wait_pio(struct advk_pcie *pcie)
> > @@ -703,8 +747,17 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
> >  						 size, val);
> >  
> >  	if (advk_pcie_pio_is_running(pcie)) {
> > -		*val = 0xffffffff;
> > -		return PCIBIOS_SET_FAILED;
> > +		/*
> > +		 * For PCI_VENDOR_ID register, return Completion Retry Status
> > +		 * so caller tries to issue the request again insted of failing
> > +		 */
> > +		if (where == PCI_VENDOR_ID) {
> > +			*val = CFG_RD_CRS_VAL;
> > +			return PCIBIOS_SUCCESSFUL;
> 
> Mmmm..here we are faking a CRS completion value to coerce the kernel
> into believing a CRS completion was received (which is not necessarily
> true) ?

This part of patch was written by me. I chose to return "fake CRS" to
let kernel / software to issue a new PCI_VENDOR_ID read request again
after timeout. After some timeout previous PIO transfer should complete
and therefore advk_pcie_pio_is_running returns false.

> if advk_pcie_pio_is_running(pcie) == true, is that an HW error ?

No. It indicates that software (kernel) was impatient for previous
config read / write request and did not wait for previous completion. So
at the time when kernel tried to issue a new (this) config read request,
previous one was still running (advk_pcie_pio_is_running returned true)
and therefore driver was not able to issue a new config read request.

In patch 3/3 I increased wait timeout so this situation when
advk_pcie_pio_is_running returns true should not happen. Or rather to
say, I was not able to reproduce it anymore.

> Lorenzo
> 
> > +		} else {
> > +			*val = 0xffffffff;
> > +			return PCIBIOS_SET_FAILED;
> > +		}
> >  	}
> >  
> >  	/* Program the control register */
> > @@ -729,15 +782,27 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
> >  	advk_writel(pcie, 1, PIO_START);
> >  
> >  	ret = advk_pcie_wait_pio(pcie);
> > +	if (ret < 0) {
> > +		/*
> > +		 * For PCI_VENDOR_ID register, return Completion Retry Status
> > +		 * so caller tries to issue the request again instead of failing
> > +		 */
> > +		if (where == PCI_VENDOR_ID) {
> > +			*val = CFG_RD_CRS_VAL;
> > +			return PCIBIOS_SUCCESSFUL;
> > +		} else {
> > +			*val = 0xffffffff;
> > +			return PCIBIOS_SET_FAILED;
> > +		}
> > +	}
> > +
> > +	/* Check PIO status and get the read result */
> > +	ret = advk_pcie_check_pio_status(pcie, val);
> >  	if (ret < 0) {
> >  		*val = 0xffffffff;
> >  		return PCIBIOS_SET_FAILED;
> >  	}
> >  
> > -	advk_pcie_check_pio_status(pcie);
> > -
> > -	/* Get the read result */
> > -	*val = advk_readl(pcie, PIO_RD_DATA);
> >  	if (size == 1)
> >  		*val = (*val >> (8 * (where & 3))) & 0xff;
> >  	else if (size == 2)
> > @@ -801,7 +866,9 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
> >  	if (ret < 0)
> >  		return PCIBIOS_SET_FAILED;
> >  
> > -	advk_pcie_check_pio_status(pcie);
> > +	ret = advk_pcie_check_pio_status(pcie, NULL);
> > +	if (ret < 0)
> > +		return PCIBIOS_SET_FAILED;
> >  
> >  	return PCIBIOS_SUCCESSFUL;
> >  }
> > -- 
> > 2.20.1
> > 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ