lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250224125153.13728-1-rengarajan.s@microchip.com>
Date: Mon, 24 Feb 2025 18:21:53 +0530
From: Rengarajan S <rengarajan.s@...rochip.com>
To: <unglinuxdriver@...rochip.com>, <broonie@...nel.org>,
	<linux-spi@...r.kernel.org>, <linux-kernel@...r.kernel.org>
CC: <rengarajan.s@...rochip.com>
Subject: [PATCH v1 for-next] spi: mchp-pci1xxxx: Updated memcpy implementation for x64 and bcm2711 processors

In Raspberry-pi CM4 devices with BCM2711 processor, the documentation
points to a limitation with 64-bit accesses. Using memcpy_fromio and
memcpy_toio for each 64-bit SPI read/write causes the first 4 bytes to be
repeated. To address the limitation, each read/write is limited to 4
bytes in case of BCM2711 processors.

On x64 systems, using memcpy_toio and memcpy_fromio results in 4-byte TLP
writes instead of 8-byte. Add the custom IO write and read for enabling
64-bit access by default.

Tested and verified performance improvement on x64 devices while
transferring 1024 bytes for 20000 iterations at 25 MHz clock frequency:

Test 1: With memcpy_fromio and memcpy_toio
spi mode: 0x0
bits per word: 8
max speed: 25000000 Hz (25000 kHz)
rate: tx 6232.5kbps, rx 6232.5kbps
rate: tx 6889.5kbps, rx 6889.5kbps
rate: tx 6765.0kbps, rx 6765.0kbps
rate: tx 6873.1kbps, rx 6873.1kbps
total: tx 20000.0KB, rx 20000.0KB

Test 2: With the custom IO write and read
spi mode: 0x0
bits per word: 8
max speed: 25000000 Hz (25000 kHz)
rate: tx 9774.7kbps, rx 9774.7kbps
rate: tx 10985.5kbps, rx 10985.5kbps
rate: tx 10749.5kbps, rx 10749.5kbps
total: tx 20000.0KB, rx 20000.0KB

Signed-off-by: Rengarajan S <rengarajan.s@...rochip.com>
---
 drivers/spi/spi-pci1xxxx.c | 95 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index fc98979eba48..ae1d76f03268 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -12,6 +12,7 @@
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/msi.h>
+#include <linux/of.h>
 #include <linux/pci_regs.h>
 #include <linux/pci.h>
 #include <linux/spinlock.h>
@@ -407,6 +408,68 @@ static void pci1xxxx_start_spi_xfer(struct pci1xxxx_spi_internal *p, u8 hw_inst)
 	writel(regval, p->parent->reg_base + SPI_MST_CTL_REG_OFFSET(hw_inst));
 }
 
+static void pci1xxxx_spi_write_to_io(void __iomem *to, const void *from,
+				     size_t count, size_t size)
+{
+	while (count) {
+		if (size == 8 && (IS_ALIGNED((unsigned long)to, 8)) &&
+		    count >= 8) {
+			__raw_writeq(*(u64 *)from, to);
+			from += 8;
+			to += 8;
+			count -= 8;
+		} else if (size >= 4 && (IS_ALIGNED((unsigned long)to, 4)) &&
+			   count >= 4) {
+			__raw_writel(*(u32 *)from, to);
+			from += 4;
+			to += 4;
+			count -= 4;
+		} else if (size >= 2 && (IS_ALIGNED((unsigned long)to, 2)) &&
+			   count >= 2) {
+			__raw_writew(*(u16 *)from, to);
+			from += 2;
+			to += 2;
+			count -= 2;
+		} else {
+			__raw_writeb(*(u8 *)from, to);
+			from += 1;
+			to += 1;
+			count -= 1;
+		}
+	}
+}
+
+static void pci1xxxx_spi_read_from_io(void *to, const void __iomem *from,
+				      size_t count, size_t size)
+{
+	while (count) {
+		if (size == 8 && (IS_ALIGNED((unsigned long)from, 8)) &&
+		    count >= 8) {
+			*(u64 *)to = __raw_readq(from);
+			from += 8;
+			to += 8;
+			count -= 8;
+		} else if (size >= 4 && (IS_ALIGNED((unsigned long)from, 4)) &&
+			   count >= 4) {
+			*(u32 *)to = __raw_readl(from);
+			from += 4;
+			to += 4;
+			count -= 4;
+		} else if (size >= 2 && (IS_ALIGNED((unsigned long)from, 2)) &&
+			   count >= 2) {
+			*(u16 *)to = __raw_readw(from);
+			from += 2;
+			to += 2;
+			count -= 2;
+		} else {
+			*(u8 *)to = __raw_readb(from);
+			from += 1;
+			to += 1;
+			count -= 1;
+		}
+	}
+}
+
 static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr,
 					 struct spi_device *spi, struct spi_transfer *xfer)
 {
@@ -444,8 +507,23 @@ static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr,
 				len = transfer_len % SPI_MAX_DATA_LEN;
 
 			reinit_completion(&p->spi_xfer_done);
-			memcpy_toio(par->reg_base + SPI_MST_CMD_BUF_OFFSET(p->hw_inst),
-				    &tx_buf[bytes_transfered], len);
+			/*
+			 * Raspberry Pi CM4 BCM2711 doesn't support 64-bit
+			 * accesses.
+			 */
+			if (of_machine_is_compatible("brcm,bcm2711")) {
+				pci1xxxx_spi_write_to_io(par->reg_base +
+							 SPI_MST_CMD_BUF_OFFSET
+							 (p->hw_inst),
+							 &tx_buf[bytes_transfered],
+							 len, 4);
+			} else {
+				pci1xxxx_spi_write_to_io(par->reg_base +
+							 SPI_MST_CMD_BUF_OFFSET
+							 (p->hw_inst),
+							 &tx_buf[bytes_transfered],
+							 len, 8);
+			}
 			bytes_transfered += len;
 			pci1xxxx_spi_setup(par, p->hw_inst, spi->mode, clkdiv, len);
 			pci1xxxx_start_spi_xfer(p, p->hw_inst);
@@ -457,8 +535,17 @@ static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr,
 				return -ETIMEDOUT;
 
 			if (rx_buf) {
-				memcpy_fromio(&rx_buf[bytes_recvd], par->reg_base +
-					      SPI_MST_RSP_BUF_OFFSET(p->hw_inst), len);
+				if (of_machine_is_compatible("brcm,bcm2711")) {
+					pci1xxxx_spi_read_from_io(&rx_buf[bytes_recvd],
+								  par->reg_base +
+								  SPI_MST_RSP_BUF_OFFSET
+								  (p->hw_inst), len, 4);
+				} else {
+					pci1xxxx_spi_read_from_io(&rx_buf[bytes_recvd],
+								  par->reg_base +
+								  SPI_MST_RSP_BUF_OFFSET
+								  (p->hw_inst), len, 8);
+				}
 				bytes_recvd += len;
 			}
 		}
-- 
2.25.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ