diff mbox series

[v1,for-next] spi: mchp-pci1xxxx: Updated memcpy implementation for x64 and bcm2711 processors

Message ID 20250224125153.13728-1-rengarajan.s@microchip.com (mailing list archive)
State New
Headers show
Series [v1,for-next] spi: mchp-pci1xxxx: Updated memcpy implementation for x64 and bcm2711 processors | expand

Commit Message

Rengarajan S Feb. 24, 2025, 12:51 p.m. UTC
In Raspberry-pi CM4 devices with BCM2711 processor, the documentation
points to a limitation with 64-bit accesses. Using memcpy_fromio and
memcpy_toio for each 64-bit SPI read/write causes the first 4 bytes to be
repeated. To address the limitation, each read/write is limited to 4
bytes in case of BCM2711 processors.

On x64 systems, using memcpy_toio and memcpy_fromio results in 4-byte TLP
writes instead of 8-byte. Add the custom IO write and read for enabling
64-bit access by default.

Tested and verified performance improvement on x64 devices while
transferring 1024 bytes for 20000 iterations at 25 MHz clock frequency:

Test 1: With memcpy_fromio and memcpy_toio
spi mode: 0x0
bits per word: 8
max speed: 25000000 Hz (25000 kHz)
rate: tx 6232.5kbps, rx 6232.5kbps
rate: tx 6889.5kbps, rx 6889.5kbps
rate: tx 6765.0kbps, rx 6765.0kbps
rate: tx 6873.1kbps, rx 6873.1kbps
total: tx 20000.0KB, rx 20000.0KB

Test 2: With the custom IO write and read
spi mode: 0x0
bits per word: 8
max speed: 25000000 Hz (25000 kHz)
rate: tx 9774.7kbps, rx 9774.7kbps
rate: tx 10985.5kbps, rx 10985.5kbps
rate: tx 10749.5kbps, rx 10749.5kbps
total: tx 20000.0KB, rx 20000.0KB

Signed-off-by: Rengarajan S <rengarajan.s@microchip.com>
---
 drivers/spi/spi-pci1xxxx.c | 95 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 4 deletions(-)

Comments

Mark Brown Feb. 24, 2025, 2:30 p.m. UTC | #1
On Mon, Feb 24, 2025 at 06:21:53PM +0530, Rengarajan S wrote:
> In Raspberry-pi CM4 devices with BCM2711 processor, the documentation
> points to a limitation with 64-bit accesses. Using memcpy_fromio and
> memcpy_toio for each 64-bit SPI read/write causes the first 4 bytes to be
> repeated. To address the limitation, each read/write is limited to 4
> bytes in case of BCM2711 processors.

This feels like something we ought to be able to figure out from the PCI
subsystem rather than requiring us to enumerate specific SoCs, or at
least have PCI drivers be able to enumerate the system PCI quirk from
the PCI core.  What's the story with making this a per driver per SoC
thing - is there some reason it won't come up elsewhere?
diff mbox series

Patch

diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index fc98979eba48..ae1d76f03268 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -12,6 +12,7 @@ 
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/msi.h>
+#include <linux/of.h>
 #include <linux/pci_regs.h>
 #include <linux/pci.h>
 #include <linux/spinlock.h>
@@ -407,6 +408,68 @@  static void pci1xxxx_start_spi_xfer(struct pci1xxxx_spi_internal *p, u8 hw_inst)
 	writel(regval, p->parent->reg_base + SPI_MST_CTL_REG_OFFSET(hw_inst));
 }
 
+static void pci1xxxx_spi_write_to_io(void __iomem *to, const void *from,
+				     size_t count, size_t size)
+{
+	while (count) {
+		if (size == 8 && (IS_ALIGNED((unsigned long)to, 8)) &&
+		    count >= 8) {
+			__raw_writeq(*(u64 *)from, to);
+			from += 8;
+			to += 8;
+			count -= 8;
+		} else if (size >= 4 && (IS_ALIGNED((unsigned long)to, 4)) &&
+			   count >= 4) {
+			__raw_writel(*(u32 *)from, to);
+			from += 4;
+			to += 4;
+			count -= 4;
+		} else if (size >= 2 && (IS_ALIGNED((unsigned long)to, 2)) &&
+			   count >= 2) {
+			__raw_writew(*(u16 *)from, to);
+			from += 2;
+			to += 2;
+			count -= 2;
+		} else {
+			__raw_writeb(*(u8 *)from, to);
+			from += 1;
+			to += 1;
+			count -= 1;
+		}
+	}
+}
+
+static void pci1xxxx_spi_read_from_io(void *to, const void __iomem *from,
+				      size_t count, size_t size)
+{
+	while (count) {
+		if (size == 8 && (IS_ALIGNED((unsigned long)from, 8)) &&
+		    count >= 8) {
+			*(u64 *)to = __raw_readq(from);
+			from += 8;
+			to += 8;
+			count -= 8;
+		} else if (size >= 4 && (IS_ALIGNED((unsigned long)from, 4)) &&
+			   count >= 4) {
+			*(u32 *)to = __raw_readl(from);
+			from += 4;
+			to += 4;
+			count -= 4;
+		} else if (size >= 2 && (IS_ALIGNED((unsigned long)from, 2)) &&
+			   count >= 2) {
+			*(u16 *)to = __raw_readw(from);
+			from += 2;
+			to += 2;
+			count -= 2;
+		} else {
+			*(u8 *)to = __raw_readb(from);
+			from += 1;
+			to += 1;
+			count -= 1;
+		}
+	}
+}
+
 static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr,
 					 struct spi_device *spi, struct spi_transfer *xfer)
 {
@@ -444,8 +507,23 @@  static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr,
 				len = transfer_len % SPI_MAX_DATA_LEN;
 
 			reinit_completion(&p->spi_xfer_done);
-			memcpy_toio(par->reg_base + SPI_MST_CMD_BUF_OFFSET(p->hw_inst),
-				    &tx_buf[bytes_transfered], len);
+			/*
+			 * Raspberry Pi CM4 BCM2711 doesn't support 64-bit
+			 * accesses.
+			 */
+			if (of_machine_is_compatible("brcm,bcm2711")) {
+				pci1xxxx_spi_write_to_io(par->reg_base +
+							 SPI_MST_CMD_BUF_OFFSET
+							 (p->hw_inst),
+							 &tx_buf[bytes_transfered],
+							 len, 4);
+			} else {
+				pci1xxxx_spi_write_to_io(par->reg_base +
+							 SPI_MST_CMD_BUF_OFFSET
+							 (p->hw_inst),
+							 &tx_buf[bytes_transfered],
+							 len, 8);
+			}
 			bytes_transfered += len;
 			pci1xxxx_spi_setup(par, p->hw_inst, spi->mode, clkdiv, len);
 			pci1xxxx_start_spi_xfer(p, p->hw_inst);
@@ -457,8 +535,17 @@  static int pci1xxxx_spi_transfer_with_io(struct spi_controller *spi_ctlr,
 				return -ETIMEDOUT;
 
 			if (rx_buf) {
-				memcpy_fromio(&rx_buf[bytes_recvd], par->reg_base +
-					      SPI_MST_RSP_BUF_OFFSET(p->hw_inst), len);
+				if (of_machine_is_compatible("brcm,bcm2711")) {
+					pci1xxxx_spi_read_from_io(&rx_buf[bytes_recvd],
+								  par->reg_base +
+								  SPI_MST_RSP_BUF_OFFSET
+								  (p->hw_inst), len, 4);
+				} else {
+					pci1xxxx_spi_read_from_io(&rx_buf[bytes_recvd],
+								  par->reg_base +
+								  SPI_MST_RSP_BUF_OFFSET
+								  (p->hw_inst), len, 8);
+				}
 				bytes_recvd += len;
 			}
 		}