lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aS3+5rw16xgMCuwE@lizhi-Precision-Tower-5810>
Date: Mon, 1 Dec 2025 15:47:34 -0500
From: Frank Li <Frank.li@....com>
To: Koichiro Den <den@...inux.co.jp>
Cc: ntb@...ts.linux.dev, linux-pci@...r.kernel.org,
	dmaengine@...r.kernel.org, linux-kernel@...r.kernel.org,
	mani@...nel.org, kwilczynski@...nel.org, kishon@...nel.org,
	bhelgaas@...gle.com, corbet@....net, vkoul@...nel.org,
	jdmason@...zu.us, dave.jiang@...el.com, allenbh@...il.com,
	Basavaraj.Natikar@....com, Shyam-sundar.S-k@....com,
	kurt.schwemmer@...rosemi.com, logang@...tatee.com,
	jingoohan1@...il.com, lpieralisi@...nel.org, robh@...nel.org,
	jbrunet@...libre.com, fancer.lancer@...il.com, arnd@...db.de,
	pstanner@...hat.com, elfring@...rs.sourceforge.net
Subject: Re: [RFC PATCH v2 23/27] NTB: epf: Add per-SoC quirk to cap MRRS for
 DWC eDMA (128B for R-Car)

On Sun, Nov 30, 2025 at 01:04:01AM +0900, Koichiro Den wrote:
> Some R-Car platforms using Synopsys DesignWare PCIe with the integrated
> eDMA exhibit reproducible payload corruption in RC->EP remote DMA read
> traffic whenever the endpoint issues 256-byte Memory Read (MRd) TLPs.
>
> The eDMA injects multiple MRd requests of size less than or equal to
> min(MRRS, MPS), so constraining the endpoint's MRd request size removes
> 256-byte MRd TLPs and avoids the issue. This change adds a per-SoC knob
> in the ntb_hw_epf driver and sets MRRS=128 on R-Car.
>
> We intentionally do not change the endpoint's MPS. Per PCIe Base
> Specification, MPS limits the payload size of TLPs with data transmitted
> by the Function, while Max_Read_Request_Size limits the size of read
> requests produced by the Function as a Requester. Limiting MRRS is
> sufficient to constrain MRd Byte Count, while lowering MPS would also
> throttle unrelated traffic (e.g. endpoint-originated Posted Writes and
> Completions with Data) without being necessary for this fix.
>
> This quirk is scoped to the affected endpoint only and can be removed
> once the underlying issue is resolved in the controller/IP.
>
> Signed-off-by: Koichiro Den <den@...inux.co.jp>
> ---

Reviewed-by: Frank Li <Frank.Li@....com>

>  drivers/ntb/hw/epf/ntb_hw_epf.c | 66 +++++++++++++++++++++++++++++----
>  1 file changed, 58 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/ntb/hw/epf/ntb_hw_epf.c b/drivers/ntb/hw/epf/ntb_hw_epf.c
> index d9811da90599..21eb26b2f7cc 100644
> --- a/drivers/ntb/hw/epf/ntb_hw_epf.c
> +++ b/drivers/ntb/hw/epf/ntb_hw_epf.c
> @@ -51,6 +51,12 @@
>
>  #define NTB_EPF_COMMAND_TIMEOUT	1000 /* 1 Sec */
>
> +struct ntb_epf_soc_data {
> +	const enum pci_barno *barno_map;
> +	/* non-zero to override MRRS for this SoC */
> +	int force_mrrs;
> +};
> +
>  enum epf_ntb_bar {
>  	BAR_CONFIG,
>  	BAR_PEER_SPAD,
> @@ -594,11 +600,12 @@ static int ntb_epf_init_dev(struct ntb_epf_dev *ndev)
>  }
>
>  static int ntb_epf_init_pci(struct ntb_epf_dev *ndev,
> -			    struct pci_dev *pdev)
> +			    struct pci_dev *pdev,
> +			    const struct ntb_epf_soc_data *soc)
>  {
>  	struct device *dev = ndev->dev;
>  	size_t spad_sz, spad_off;
> -	int ret;
> +	int ret, cur;
>
>  	pci_set_drvdata(pdev, ndev);
>
> @@ -616,6 +623,17 @@ static int ntb_epf_init_pci(struct ntb_epf_dev *ndev,
>
>  	pci_set_master(pdev);
>
> +	if (soc && pci_is_pcie(pdev) && soc->force_mrrs) {
> +		cur = pcie_get_readrq(pdev);
> +		ret = pcie_set_readrq(pdev, soc->force_mrrs);
> +		if (ret)
> +			dev_warn(&pdev->dev, "failed to set MRRS=%d: %d\n",
> +				 soc->force_mrrs, ret);
> +		else
> +			dev_info(&pdev->dev, "capped MRRS: %d->%d for ntb-epf\n",
> +				 cur, soc->force_mrrs);
> +	}
> +
>  	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
>  	if (ret) {
>  		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
> @@ -690,6 +708,7 @@ static void ntb_epf_cleanup_isr(struct ntb_epf_dev *ndev)
>  static int ntb_epf_pci_probe(struct pci_dev *pdev,
>  			     const struct pci_device_id *id)
>  {
> +	const struct ntb_epf_soc_data *soc = (const void *)id->driver_data;
>  	struct device *dev = &pdev->dev;
>  	struct ntb_epf_dev *ndev;
>  	int ret;
> @@ -701,16 +720,16 @@ static int ntb_epf_pci_probe(struct pci_dev *pdev,
>  	if (!ndev)
>  		return -ENOMEM;
>
> -	ndev->barno_map = (const enum pci_barno *)id->driver_data;
> -	if (!ndev->barno_map)
> +	if (!soc || !soc->barno_map)
>  		return -EINVAL;
>
> +	ndev->barno_map = soc->barno_map;
>  	ndev->dev = dev;
>
>  	ntb_epf_init_struct(ndev, pdev);
>  	mutex_init(&ndev->cmd_lock);
>
> -	ret = ntb_epf_init_pci(ndev, pdev);
> +	ret = ntb_epf_init_pci(ndev, pdev, soc);
>  	if (ret) {
>  		dev_err(dev, "Failed to init PCI\n");
>  		return ret;
> @@ -778,21 +797,52 @@ static const enum pci_barno rcar_barno[NTB_BAR_NUM] = {
>  	[BAR_MW4]	= NO_BAR,
>  };
>
> +static const struct ntb_epf_soc_data j721e_soc = {
> +	.barno_map = j721e_map,
> +};
> +
> +static const struct ntb_epf_soc_data mx8_soc = {
> +	.barno_map = mx8_map,
> +};
> +
> +static const struct ntb_epf_soc_data rcar_soc = {
> +	.barno_map = rcar_barno,
> +	/*
> +	 * On some R-Car platforms using the Synopsys DWC PCIe + eDMA we
> +	 * observe data corruption on RC->EP Remote DMA Read paths whenever
> +	 * the EP issues large MRd requests. The corruption consistently
> +	 * hits the tail of each 256-byte segment (e.g. offsets
> +	 * 0x00E0..0x00FF within a 256B block, and again at 0x01E0..0x01FF
> +	 * for larger transfers).
> +	 *
> +	 * The DMA injects multiple MRd requests of size less than or equal
> +	 * to the min(MRRS, MPS) into the outbound request path. By
> +	 * lowering MRRS to 128 we prevent 256B MRd TLPs from being
> +	 * generated and avoid the issue on the affected hardware. We
> +	 * intentionally keep MPS unchanged and scope this quirk to this
> +	 * endpoint to avoid impacting unrelated devices.
> +	 *
> +	 * Remove this once the issue is resolved (maybe controller/IP
> +	 * level) or a more preferable workaround becomes available.
> +	 */
> +	.force_mrrs = 128,
> +};
> +
>  static const struct pci_device_id ntb_epf_pci_tbl[] = {
>  	{
>  		PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_J721E),
>  		.class = PCI_CLASS_MEMORY_RAM << 8, .class_mask = 0xffff00,
> -		.driver_data = (kernel_ulong_t)j721e_map,
> +		.driver_data = (kernel_ulong_t)&j721e_soc,
>  	},
>  	{
>  		PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, 0x0809),
>  		.class = PCI_CLASS_MEMORY_RAM << 8, .class_mask = 0xffff00,
> -		.driver_data = (kernel_ulong_t)mx8_map,
> +		.driver_data = (kernel_ulong_t)&mx8_soc,
>  	},
>  	{
>  		PCI_DEVICE(PCI_VENDOR_ID_RENESAS, 0x0030),
>  		.class = PCI_CLASS_MEMORY_RAM << 8, .class_mask = 0xffff00,
> -		.driver_data = (kernel_ulong_t)rcar_barno,
> +		.driver_data = (kernel_ulong_t)&rcar_soc,
>  	},
>  	{ },
>  };
> --
> 2.48.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ