diff mbox series

[v9,4/9] spi: cadence: Add Marvell SDMA operations

Message ID 20240619141716.1785467-5-wsadowski@marvell.com (mailing list archive)
State Superseded
Headers show
Series Marvell HW overlay support for Cadence xSPI | expand

Commit Message

Witold Sadowski June 19, 2024, 2:17 p.m. UTC
In Marvell xSPI implementation any access to SDMA register will result
in 8 byte SPI data transfer. Reading less data(eg. 1B) will result in
losing remaining bytes. To avoid that read/write 8 bytes into temporary
buffer, and read/write whole temporary buffer into SDMA.

Signed-off-by: Witold Sadowski <wsadowski@marvell.com>
---
 drivers/spi/spi-cadence-xspi.c | 56 ++++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 3 deletions(-)

Comments

Mark Brown June 24, 2024, 2:29 p.m. UTC | #1
On Wed, Jun 19, 2024 at 07:17:10AM -0700, Witold Sadowski wrote:

> +static void m_ioreadq(void __iomem  *addr, void *buf, int len)
> +{
> +	u64 tmp_buf;
> +
> +	while (len) {
> +		tmp_buf = readq(addr);
> +		memcpy(buf, &tmp_buf, len > 8 ? 8 : len);
> +		len = len > 8 ? len - 8 : 0;
> +		buf += 8;
> +	}
> +}

Wouldn't it be more efficient and readable to only do the memcpy() for
the trailing bytes and just do this memcpy() for the final word?
Witold Sadowski June 28, 2024, 1:45 p.m. UTC | #2
Hi Mark
> 
> > +static void m_ioreadq(void __iomem  *addr, void *buf, int len) {
> > +	u64 tmp_buf;
> > +
> > +	while (len) {
> > +		tmp_buf = readq(addr);
> > +		memcpy(buf, &tmp_buf, len > 8 ? 8 : len);
> > +		len = len > 8 ? len - 8 : 0;
> > +		buf += 8;
> > +	}
> > +}
> 
> Wouldn't it be more efficient and readable to only do the memcpy() for the
> trailing bytes and just do this memcpy() for the final word?

The whole problem is with SDMA end - addr in that case. If code will try to
Read it in non 64b mode, all remaining bits, will be lost.
For example - doing 1B read on that register, will return 1B, but SDMA will
transfer 8B, dropping remaining 7B.
I have tried memcpy approach, and it was not stable.

Regards
Witek
Mark Brown June 28, 2024, 1:52 p.m. UTC | #3
On Fri, Jun 28, 2024 at 01:45:13PM +0000, Witold Sadowski wrote:
> Hi Mark

> > > +	while (len) {
> > > +		tmp_buf = readq(addr);
> > > +		memcpy(buf, &tmp_buf, len > 8 ? 8 : len);
> > > +		len = len > 8 ? len - 8 : 0;
> > > +		buf += 8;
> > > +	}
> > > +}

> > Wouldn't it be more efficient and readable to only do the memcpy() for the
> > trailing bytes and just do this memcpy() for the final word?

> The whole problem is with SDMA end - addr in that case. If code will try to
> Read it in non 64b mode, all remaining bits, will be lost.
> For example - doing 1B read on that register, will return 1B, but SDMA will
> transfer 8B, dropping remaining 7B.
> I have tried memcpy approach, and it was not stable.

That's not what I'm suggesting, like I said above I'm suggesting to
*only* do the memcpy() for the trailing word.
diff mbox series

Patch

diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c
index d0222284c507..c79f2a2931a8 100644
--- a/drivers/spi/spi-cadence-xspi.c
+++ b/drivers/spi/spi-cadence-xspi.c
@@ -310,6 +310,7 @@  struct cdns_xspi_dev {
 	u8 hw_num_banks;
 
 	const struct cdns_xspi_driver_data *driver_data;
+	void (*sdma_handler)(struct cdns_xspi_dev *cdns_xspi);
 };
 
 static void cdns_xspi_reset_dll(struct cdns_xspi_dev *cdns_xspi)
@@ -515,6 +516,52 @@  static void cdns_xspi_sdma_handle(struct cdns_xspi_dev *cdns_xspi)
 	}
 }
 
+static void m_ioreadq(void __iomem  *addr, void *buf, int len)
+{
+	u64 tmp_buf;
+
+	while (len) {
+		tmp_buf = readq(addr);
+		memcpy(buf, &tmp_buf, len > 8 ? 8 : len);
+		len = len > 8 ? len - 8 : 0;
+		buf += 8;
+	}
+}
+
+static void m_iowriteq(void __iomem *addr, const void *buf, int len)
+{
+	u64 tmp_buf;
+
+	while (len) {
+		memcpy(&tmp_buf, buf, len > 8 ? 8 : len);
+		writeq(tmp_buf, addr);
+		len = len > 8 ? len - 8 : 0;
+		buf += 8;
+	}
+}
+
+static void marvell_xspi_sdma_handle(struct cdns_xspi_dev *cdns_xspi)
+{
+	u32 sdma_size, sdma_trd_info;
+	u8 sdma_dir;
+
+	sdma_size = readl(cdns_xspi->iobase + CDNS_XSPI_SDMA_SIZE_REG);
+	sdma_trd_info = readl(cdns_xspi->iobase + CDNS_XSPI_SDMA_TRD_INFO_REG);
+	sdma_dir = FIELD_GET(CDNS_XSPI_SDMA_DIR, sdma_trd_info);
+
+	switch (sdma_dir) {
+	case CDNS_XSPI_SDMA_DIR_READ:
+		m_ioreadq(cdns_xspi->sdmabase,
+			    cdns_xspi->in_buffer, sdma_size);
+		break;
+
+	case CDNS_XSPI_SDMA_DIR_WRITE:
+		m_iowriteq(cdns_xspi->sdmabase,
+			     cdns_xspi->out_buffer, sdma_size);
+		break;
+	}
+}
+
 static int cdns_xspi_send_stig_command(struct cdns_xspi_dev *cdns_xspi,
 				       const struct spi_mem_op *op,
 				       bool data_phase)
@@ -566,7 +613,7 @@  static int cdns_xspi_send_stig_command(struct cdns_xspi_dev *cdns_xspi,
 			cdns_xspi_set_interrupts(cdns_xspi, false);
 			return -EIO;
 		}
-		cdns_xspi_sdma_handle(cdns_xspi);
+		cdns_xspi->sdma_handler(cdns_xspi);
 	}
 
 	wait_for_completion(&cdns_xspi->cmd_complete);
@@ -736,10 +783,13 @@  static int cdns_xspi_probe(struct platform_device *pdev)
 	if (!cdns_xspi->driver_data)
 		return -ENODEV;
 
-	if (cdns_xspi->driver_data->mrvl_hw_overlay)
+	if (cdns_xspi->driver_data->mrvl_hw_overlay) {
 		host->mem_ops = &marvell_xspi_mem_ops;
-	else
+		cdns_xspi->sdma_handler = &marvell_xspi_sdma_handle;
+	} else {
 		host->mem_ops = &cadence_xspi_mem_ops;
+		cdns_xspi->sdma_handler = &cdns_xspi_sdma_handle;
+	}
 	host->dev.of_node = pdev->dev.of_node;
 	host->bus_num = -1;