Message ID | 20241121115201.2191-2-quic_jseerapu@quicinc.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | Add immediate DMA support | expand |
On Thu, Nov 21, 2024 at 05:22:01PM +0530, Jyothi Kumar Seerapu wrote: > The DMA TRE(Transfer ring element) buffer contains the DMA > buffer address. Accessing data from this address can cause > significant delays in SPI transfers, which can be mitigated to > some extent by utilizing immediate DMA support. > > QCOM GPI DMA hardware supports an immediate DMA feature for data > up to 8 bytes, storing the data directly in the DMA TRE buffer > instead of the DMA buffer address. This enhancement enables faster > SPI data transfers. Is it supported on all GPI DMA platforms, starting from SDM845? > > This optimization reduces the average transfer time from 25 us to > 16 us for a single SPI transfer of 8 bytes length, with a clock > frequency of 50 MHz. > > Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com> > --- > drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++----- > drivers/spi/spi-geni-qcom.c | 7 +++++++ > include/linux/dma/qcom-gpi-dma.h | 7 +++++++ How is this supposed to be merged? Please try to separate the patches by the subsystem, letting maintainers to handle possible dependencies. > 3 files changed, 41 insertions(+), 5 deletions(-) > > diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c > index 52a7c8f2498f..a8df1e835e27 100644 > --- a/drivers/dma/qcom/gpi.c > +++ b/drivers/dma/qcom/gpi.c > @@ -27,6 +27,7 @@ > #define TRE_FLAGS_IEOT BIT(9) > #define TRE_FLAGS_BEI BIT(10) > #define TRE_FLAGS_LINK BIT(11) > +#define TRE_FLAGS_IMMEDIATE_DMA BIT(16) > #define TRE_FLAGS_TYPE GENMASK(23, 16) > > /* SPI CONFIG0 WD0 */ > @@ -64,6 +65,7 @@ > > /* DMA TRE */ > #define TRE_DMA_LEN GENMASK(23, 0) > +#define TRE_DMA_IMMEDIATE_LEN GENMASK(3, 0) > > /* Register offsets from gpi-top */ > #define GPII_n_CH_k_CNTXT_0_OFFS(n, k) (0x20000 + (0x4000 * (n)) + (0x80 * (k))) > @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, > dma_addr_t address; > struct gpi_tre *tre; > unsigned int i; > + u8 *buf; > + int len = 0; > > /* first create config tre if applicable */ > if (direction == DMA_MEM_TO_DEV && spi->set_config) { > @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, > tre_idx++; > > address = sg_dma_address(sgl); > - tre->dword[0] = lower_32_bits(address); > - tre->dword[1] = upper_32_bits(address); > + len = sg_dma_len(sgl); > > - tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN); > + /* Support Immediate dma for write transfers for data length up to 8 bytes */ > + if ((spi->flags & QCOM_GPI_IMMEDIATE_DMA) && direction == DMA_MEM_TO_DEV) { > + buf = (u8 *)sg_virt(sgl); > > - tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > - if (direction == DMA_MEM_TO_DEV) > + /* memcpy may not always be length of 8, hence pre-fill both dword's with 0 */ > + tre->dword[0] = 0; > + tre->dword[1] = 0; > + memcpy((u8 *)&tre->dword[0], buf, len); Drop all type conversions, they should not be necessary. memcpy() functions accepts void pointers. > + > + tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN); > + > + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); > + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IMMEDIATE_DMA); > + } else { > + tre->dword[0] = lower_32_bits(address); > + tre->dword[1] = upper_32_bits(address); > + > + tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN); > + > + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > + if (direction == DMA_MEM_TO_DEV) > + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); > + } > > for (i = 0; i < tre_idx; i++) > dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0], > diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c > index 768d7482102a..53c8f6b7f3c5 100644 > --- a/drivers/spi/spi-geni-qcom.c > +++ b/drivers/spi/spi-geni-qcom.c > @@ -472,11 +472,18 @@ static int setup_gsi_xfer(struct spi_transfer *xfer, struct spi_geni_master *mas > mas->cur_speed_hz = xfer->speed_hz; > } > > + /* > + * Set QCOM_GPI_IMMEDIATE_DMA flag if transfer length up to 8 bytes. > + */ > if (xfer->tx_buf && xfer->rx_buf) { > peripheral.cmd = SPI_DUPLEX; > + if (xfer->len <= QCOM_GPI_IMMEDIATE_DMA_LEN) > + peripheral.flags |= QCOM_GPI_IMMEDIATE_DMA; > } else if (xfer->tx_buf) { > peripheral.cmd = SPI_TX; > peripheral.rx_len = 0; > + if (xfer->len <= QCOM_GPI_IMMEDIATE_DMA_LEN) > + peripheral.flags |= QCOM_GPI_IMMEDIATE_DMA; > } else if (xfer->rx_buf) { > peripheral.cmd = SPI_RX; > if (!(mas->cur_bits_per_word % MIN_WORD_LEN)) { > diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h > index 6680dd1a43c6..0eb96e62a1f1 100644 > --- a/include/linux/dma/qcom-gpi-dma.h > +++ b/include/linux/dma/qcom-gpi-dma.h > @@ -15,6 +15,11 @@ enum spi_transfer_cmd { > SPI_DUPLEX, > }; > > +#define QCOM_GPI_BLOCK_EVENT_IRQ BIT(0) Unrelated, please drop. > +#define QCOM_GPI_IMMEDIATE_DMA BIT(1) Can GPI driver deduce whether it should use immediate DMA based on the transfer length? > + > +#define QCOM_GPI_IMMEDIATE_DMA_LEN 8 > + > /** > * struct gpi_spi_config - spi config for peripheral > * > @@ -30,6 +35,7 @@ enum spi_transfer_cmd { > * @cs: chip select toggle > * @set_config: set peripheral config > * @rx_len: receive length for buffer > + * @flags: flags for immediate dma and block event interrupt support > */ > struct gpi_spi_config { > u8 set_config; > @@ -44,6 +50,7 @@ struct gpi_spi_config { > u32 clk_src; > enum spi_transfer_cmd cmd; > u32 rx_len; > + u8 flags; > }; > > enum i2c_op { > -- > 2.17.1 >
On 11/22/2024 3:38 AM, Dmitry Baryshkov wrote: > On Thu, Nov 21, 2024 at 05:22:01PM +0530, Jyothi Kumar Seerapu wrote: >> The DMA TRE(Transfer ring element) buffer contains the DMA >> buffer address. Accessing data from this address can cause >> significant delays in SPI transfers, which can be mitigated to >> some extent by utilizing immediate DMA support. >> >> QCOM GPI DMA hardware supports an immediate DMA feature for data >> up to 8 bytes, storing the data directly in the DMA TRE buffer >> instead of the DMA buffer address. This enhancement enables faster >> SPI data transfers. > > Is it supported on all GPI DMA platforms, starting from SDM845? Yes, it supported on all GPI DMA platforms. > >> >> This optimization reduces the average transfer time from 25 us to >> 16 us for a single SPI transfer of 8 bytes length, with a clock >> frequency of 50 MHz. >> >> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com> >> --- >> drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++----- >> drivers/spi/spi-geni-qcom.c | 7 +++++++ >> include/linux/dma/qcom-gpi-dma.h | 7 +++++++ > > How is this supposed to be merged? Please try to separate the patches by > the subsystem, letting maintainers to handle possible dependencies. > Sure, separated the patches in V2. >> 3 files changed, 41 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c >> index 52a7c8f2498f..a8df1e835e27 100644 >> --- a/drivers/dma/qcom/gpi.c >> +++ b/drivers/dma/qcom/gpi.c >> @@ -27,6 +27,7 @@ >> #define TRE_FLAGS_IEOT BIT(9) >> #define TRE_FLAGS_BEI BIT(10) >> #define TRE_FLAGS_LINK BIT(11) >> +#define TRE_FLAGS_IMMEDIATE_DMA BIT(16) >> #define TRE_FLAGS_TYPE GENMASK(23, 16) >> >> /* SPI CONFIG0 WD0 */ >> @@ -64,6 +65,7 @@ >> >> /* DMA TRE */ >> #define TRE_DMA_LEN GENMASK(23, 0) >> +#define TRE_DMA_IMMEDIATE_LEN GENMASK(3, 0) >> >> /* Register offsets from gpi-top */ >> #define GPII_n_CH_k_CNTXT_0_OFFS(n, k) (0x20000 + (0x4000 * (n)) + (0x80 * (k))) >> @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, >> dma_addr_t address; >> struct gpi_tre *tre; >> unsigned int i; >> + u8 *buf; >> + int len = 0; >> >> /* first create config tre if applicable */ >> if (direction == DMA_MEM_TO_DEV && spi->set_config) { >> @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, >> tre_idx++; >> >> address = sg_dma_address(sgl); >> - tre->dword[0] = lower_32_bits(address); >> - tre->dword[1] = upper_32_bits(address); >> + len = sg_dma_len(sgl); >> >> - tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN); >> + /* Support Immediate dma for write transfers for data length up to 8 bytes */ >> + if ((spi->flags & QCOM_GPI_IMMEDIATE_DMA) && direction == DMA_MEM_TO_DEV) { >> + buf = (u8 *)sg_virt(sgl); >> >> - tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); >> - if (direction == DMA_MEM_TO_DEV) >> + /* memcpy may not always be length of 8, hence pre-fill both dword's with 0 */ >> + tre->dword[0] = 0; >> + tre->dword[1] = 0; >> + memcpy((u8 *)&tre->dword[0], buf, len); > > Drop all type conversions, they should not be necessary. memcpy() > functions accepts void pointers. Done > >> + >> + tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN); >> + >> + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); >> tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); >> + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IMMEDIATE_DMA); >> + } else { >> + tre->dword[0] = lower_32_bits(address); >> + tre->dword[1] = upper_32_bits(address); >> + >> + tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN); >> + >> + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); >> + if (direction == DMA_MEM_TO_DEV) >> + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); >> + } >> >> for (i = 0; i < tre_idx; i++) >> dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0], >> diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c >> index 768d7482102a..53c8f6b7f3c5 100644 >> --- a/drivers/spi/spi-geni-qcom.c >> +++ b/drivers/spi/spi-geni-qcom.c >> @@ -472,11 +472,18 @@ static int setup_gsi_xfer(struct spi_transfer *xfer, struct spi_geni_master *mas >> mas->cur_speed_hz = xfer->speed_hz; >> } >> >> + /* >> + * Set QCOM_GPI_IMMEDIATE_DMA flag if transfer length up to 8 bytes. >> + */ >> if (xfer->tx_buf && xfer->rx_buf) { >> peripheral.cmd = SPI_DUPLEX; >> + if (xfer->len <= QCOM_GPI_IMMEDIATE_DMA_LEN) >> + peripheral.flags |= QCOM_GPI_IMMEDIATE_DMA; >> } else if (xfer->tx_buf) { >> peripheral.cmd = SPI_TX; >> peripheral.rx_len = 0; >> + if (xfer->len <= QCOM_GPI_IMMEDIATE_DMA_LEN) >> + peripheral.flags |= QCOM_GPI_IMMEDIATE_DMA; >> } else if (xfer->rx_buf) { >> peripheral.cmd = SPI_RX; >> if (!(mas->cur_bits_per_word % MIN_WORD_LEN)) { >> diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h >> index 6680dd1a43c6..0eb96e62a1f1 100644 >> --- a/include/linux/dma/qcom-gpi-dma.h >> +++ b/include/linux/dma/qcom-gpi-dma.h >> @@ -15,6 +15,11 @@ enum spi_transfer_cmd { >> SPI_DUPLEX, >> }; >> >> +#define QCOM_GPI_BLOCK_EVENT_IRQ BIT(0) > > Unrelated, please drop. Sure, done. > >> +#define QCOM_GPI_IMMEDIATE_DMA BIT(1) > > Can GPI driver deduce whether it should use immediate DMA based on the > transfer length? protocol driver like SPI will update whether it should use immediate DMA or not based on the transfer length and GPI driver will configure it accordingly . > >> + >> +#define QCOM_GPI_IMMEDIATE_DMA_LEN 8 >> + >> /** >> * struct gpi_spi_config - spi config for peripheral >> * >> @@ -30,6 +35,7 @@ enum spi_transfer_cmd { >> * @cs: chip select toggle >> * @set_config: set peripheral config >> * @rx_len: receive length for buffer >> + * @flags: flags for immediate dma and block event interrupt support >> */ >> struct gpi_spi_config { >> u8 set_config; >> @@ -44,6 +50,7 @@ struct gpi_spi_config { >> u32 clk_src; >> enum spi_transfer_cmd cmd; >> u32 rx_len; >> + u8 flags; >> }; >> >> enum i2c_op { >> -- >> 2.17.1 >> >
On Thu, Nov 28, 2024 at 07:06:22PM +0530, Jyothi Kumar Seerapu wrote: > > > On 11/22/2024 3:38 AM, Dmitry Baryshkov wrote: > > On Thu, Nov 21, 2024 at 05:22:01PM +0530, Jyothi Kumar Seerapu wrote: > > > The DMA TRE(Transfer ring element) buffer contains the DMA > > > buffer address. Accessing data from this address can cause > > > significant delays in SPI transfers, which can be mitigated to > > > some extent by utilizing immediate DMA support. > > > > > > QCOM GPI DMA hardware supports an immediate DMA feature for data > > > up to 8 bytes, storing the data directly in the DMA TRE buffer > > > instead of the DMA buffer address. This enhancement enables faster > > > SPI data transfers. > > > > Is it supported on all GPI DMA platforms, starting from SDM845? > Yes, it supported on all GPI DMA platforms. > > > > > > > > This optimization reduces the average transfer time from 25 us to > > > 16 us for a single SPI transfer of 8 bytes length, with a clock > > > frequency of 50 MHz. > > > > > > Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com> > > > --- > > > drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++----- > > > drivers/spi/spi-geni-qcom.c | 7 +++++++ > > > include/linux/dma/qcom-gpi-dma.h | 7 +++++++ > > > > How is this supposed to be merged? Please try to separate the patches by > > the subsystem, letting maintainers to handle possible dependencies. > > > Sure, separated the patches in V2. > > > 3 files changed, 41 insertions(+), 5 deletions(-) > > > > > > diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c > > > index 52a7c8f2498f..a8df1e835e27 100644 > > > --- a/drivers/dma/qcom/gpi.c > > > +++ b/drivers/dma/qcom/gpi.c > > > @@ -27,6 +27,7 @@ > > > #define TRE_FLAGS_IEOT BIT(9) > > > #define TRE_FLAGS_BEI BIT(10) > > > #define TRE_FLAGS_LINK BIT(11) > > > +#define TRE_FLAGS_IMMEDIATE_DMA BIT(16) > > > #define TRE_FLAGS_TYPE GENMASK(23, 16) > > > /* SPI CONFIG0 WD0 */ > > > @@ -64,6 +65,7 @@ > > > /* DMA TRE */ > > > #define TRE_DMA_LEN GENMASK(23, 0) > > > +#define TRE_DMA_IMMEDIATE_LEN GENMASK(3, 0) > > > /* Register offsets from gpi-top */ > > > #define GPII_n_CH_k_CNTXT_0_OFFS(n, k) (0x20000 + (0x4000 * (n)) + (0x80 * (k))) > > > @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, > > > dma_addr_t address; > > > struct gpi_tre *tre; > > > unsigned int i; > > > + u8 *buf; > > > + int len = 0; > > > /* first create config tre if applicable */ > > > if (direction == DMA_MEM_TO_DEV && spi->set_config) { > > > @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, > > > tre_idx++; > > > address = sg_dma_address(sgl); > > > - tre->dword[0] = lower_32_bits(address); > > > - tre->dword[1] = upper_32_bits(address); > > > + len = sg_dma_len(sgl); > > > - tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN); > > > + /* Support Immediate dma for write transfers for data length up to 8 bytes */ > > > + if ((spi->flags & QCOM_GPI_IMMEDIATE_DMA) && direction == DMA_MEM_TO_DEV) { > > > + buf = (u8 *)sg_virt(sgl); > > > - tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > > > - if (direction == DMA_MEM_TO_DEV) > > > + /* memcpy may not always be length of 8, hence pre-fill both dword's with 0 */ > > > + tre->dword[0] = 0; > > > + tre->dword[1] = 0; > > > + memcpy((u8 *)&tre->dword[0], buf, len); > > > > Drop all type conversions, they should not be necessary. memcpy() > > functions accepts void pointers. > Done > > > > > + > > > + tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN); > > > + > > > + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > > > tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); > > > + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IMMEDIATE_DMA); > > > + } else { > > > + tre->dword[0] = lower_32_bits(address); > > > + tre->dword[1] = upper_32_bits(address); > > > + > > > + tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN); > > > + > > > + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); > > > + if (direction == DMA_MEM_TO_DEV) > > > + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); > > > + } > > > for (i = 0; i < tre_idx; i++) > > > dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0], > > > diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c > > > index 768d7482102a..53c8f6b7f3c5 100644 > > > --- a/drivers/spi/spi-geni-qcom.c > > > +++ b/drivers/spi/spi-geni-qcom.c > > > @@ -472,11 +472,18 @@ static int setup_gsi_xfer(struct spi_transfer *xfer, struct spi_geni_master *mas > > > mas->cur_speed_hz = xfer->speed_hz; > > > } > > > + /* > > > + * Set QCOM_GPI_IMMEDIATE_DMA flag if transfer length up to 8 bytes. > > > + */ > > > if (xfer->tx_buf && xfer->rx_buf) { > > > peripheral.cmd = SPI_DUPLEX; > > > + if (xfer->len <= QCOM_GPI_IMMEDIATE_DMA_LEN) > > > + peripheral.flags |= QCOM_GPI_IMMEDIATE_DMA; > > > } else if (xfer->tx_buf) { > > > peripheral.cmd = SPI_TX; > > > peripheral.rx_len = 0; > > > + if (xfer->len <= QCOM_GPI_IMMEDIATE_DMA_LEN) > > > + peripheral.flags |= QCOM_GPI_IMMEDIATE_DMA; > > > } else if (xfer->rx_buf) { > > > peripheral.cmd = SPI_RX; > > > if (!(mas->cur_bits_per_word % MIN_WORD_LEN)) { > > > diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h > > > index 6680dd1a43c6..0eb96e62a1f1 100644 > > > --- a/include/linux/dma/qcom-gpi-dma.h > > > +++ b/include/linux/dma/qcom-gpi-dma.h > > > @@ -15,6 +15,11 @@ enum spi_transfer_cmd { > > > SPI_DUPLEX, > > > }; > > > +#define QCOM_GPI_BLOCK_EVENT_IRQ BIT(0) > > > > Unrelated, please drop. > Sure, done. > > > > > +#define QCOM_GPI_IMMEDIATE_DMA BIT(1) > > > > Can GPI driver deduce whether it should use immediate DMA based on the > > transfer length? > protocol driver like SPI will update whether it should use immediate DMA or > not based on the transfer length and GPI driver will configure it > accordingly . You are describing your implementation. I asked if it is possible to be implemented other way around. Otherwise you are adding too much knowledge to the SPI driver. > > > > > + > > > +#define QCOM_GPI_IMMEDIATE_DMA_LEN 8 > > > + > > > /** > > > * struct gpi_spi_config - spi config for peripheral > > > * > > > @@ -30,6 +35,7 @@ enum spi_transfer_cmd { > > > * @cs: chip select toggle > > > * @set_config: set peripheral config > > > * @rx_len: receive length for buffer > > > + * @flags: flags for immediate dma and block event interrupt support > > > */ > > > struct gpi_spi_config { > > > u8 set_config; > > > @@ -44,6 +50,7 @@ struct gpi_spi_config { > > > u32 clk_src; > > > enum spi_transfer_cmd cmd; > > > u32 rx_len; > > > + u8 flags; > > > }; > > > enum i2c_op { > > > -- > > > 2.17.1 > > > > >
On 11/28/2024 7:32 PM, Dmitry Baryshkov wrote: > On Thu, Nov 28, 2024 at 07:06:22PM +0530, Jyothi Kumar Seerapu wrote: >> >> >> On 11/22/2024 3:38 AM, Dmitry Baryshkov wrote: >>> On Thu, Nov 21, 2024 at 05:22:01PM +0530, Jyothi Kumar Seerapu wrote: >>>> The DMA TRE(Transfer ring element) buffer contains the DMA >>>> buffer address. Accessing data from this address can cause >>>> significant delays in SPI transfers, which can be mitigated to >>>> some extent by utilizing immediate DMA support. >>>> >>>> QCOM GPI DMA hardware supports an immediate DMA feature for data >>>> up to 8 bytes, storing the data directly in the DMA TRE buffer >>>> instead of the DMA buffer address. This enhancement enables faster >>>> SPI data transfers. >>> >>> Is it supported on all GPI DMA platforms, starting from SDM845? >> Yes, it supported on all GPI DMA platforms. >>> >>>> >>>> This optimization reduces the average transfer time from 25 us to >>>> 16 us for a single SPI transfer of 8 bytes length, with a clock >>>> frequency of 50 MHz. >>>> >>>> Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com> >>>> --- >>>> drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++----- >>>> drivers/spi/spi-geni-qcom.c | 7 +++++++ >>>> include/linux/dma/qcom-gpi-dma.h | 7 +++++++ >>> >>> How is this supposed to be merged? Please try to separate the patches by >>> the subsystem, letting maintainers to handle possible dependencies. >>> >> Sure, separated the patches in V2. >>>> 3 files changed, 41 insertions(+), 5 deletions(-) >>>> >>>> diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c >>>> index 52a7c8f2498f..a8df1e835e27 100644 >>>> --- a/drivers/dma/qcom/gpi.c >>>> +++ b/drivers/dma/qcom/gpi.c >>>> @@ -27,6 +27,7 @@ >>>> #define TRE_FLAGS_IEOT BIT(9) >>>> #define TRE_FLAGS_BEI BIT(10) >>>> #define TRE_FLAGS_LINK BIT(11) >>>> +#define TRE_FLAGS_IMMEDIATE_DMA BIT(16) >>>> #define TRE_FLAGS_TYPE GENMASK(23, 16) >>>> /* SPI CONFIG0 WD0 */ >>>> @@ -64,6 +65,7 @@ >>>> /* DMA TRE */ >>>> #define TRE_DMA_LEN GENMASK(23, 0) >>>> +#define TRE_DMA_IMMEDIATE_LEN GENMASK(3, 0) >>>> /* Register offsets from gpi-top */ >>>> #define GPII_n_CH_k_CNTXT_0_OFFS(n, k) (0x20000 + (0x4000 * (n)) + (0x80 * (k))) >>>> @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, >>>> dma_addr_t address; >>>> struct gpi_tre *tre; >>>> unsigned int i; >>>> + u8 *buf; >>>> + int len = 0; >>>> /* first create config tre if applicable */ >>>> if (direction == DMA_MEM_TO_DEV && spi->set_config) { >>>> @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, >>>> tre_idx++; >>>> address = sg_dma_address(sgl); >>>> - tre->dword[0] = lower_32_bits(address); >>>> - tre->dword[1] = upper_32_bits(address); >>>> + len = sg_dma_len(sgl); >>>> - tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN); >>>> + /* Support Immediate dma for write transfers for data length up to 8 bytes */ >>>> + if ((spi->flags & QCOM_GPI_IMMEDIATE_DMA) && direction == DMA_MEM_TO_DEV) { >>>> + buf = (u8 *)sg_virt(sgl); >>>> - tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); >>>> - if (direction == DMA_MEM_TO_DEV) >>>> + /* memcpy may not always be length of 8, hence pre-fill both dword's with 0 */ >>>> + tre->dword[0] = 0; >>>> + tre->dword[1] = 0; >>>> + memcpy((u8 *)&tre->dword[0], buf, len); >>> >>> Drop all type conversions, they should not be necessary. memcpy() >>> functions accepts void pointers. >> Done >>> >>>> + >>>> + tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN); >>>> + >>>> + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); >>>> tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); >>>> + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IMMEDIATE_DMA); >>>> + } else { >>>> + tre->dword[0] = lower_32_bits(address); >>>> + tre->dword[1] = upper_32_bits(address); >>>> + >>>> + tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN); >>>> + >>>> + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); >>>> + if (direction == DMA_MEM_TO_DEV) >>>> + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); >>>> + } >>>> for (i = 0; i < tre_idx; i++) >>>> dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0], >>>> diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c >>>> index 768d7482102a..53c8f6b7f3c5 100644 >>>> --- a/drivers/spi/spi-geni-qcom.c >>>> +++ b/drivers/spi/spi-geni-qcom.c >>>> @@ -472,11 +472,18 @@ static int setup_gsi_xfer(struct spi_transfer *xfer, struct spi_geni_master *mas >>>> mas->cur_speed_hz = xfer->speed_hz; >>>> } >>>> + /* >>>> + * Set QCOM_GPI_IMMEDIATE_DMA flag if transfer length up to 8 bytes. >>>> + */ >>>> if (xfer->tx_buf && xfer->rx_buf) { >>>> peripheral.cmd = SPI_DUPLEX; >>>> + if (xfer->len <= QCOM_GPI_IMMEDIATE_DMA_LEN) >>>> + peripheral.flags |= QCOM_GPI_IMMEDIATE_DMA; >>>> } else if (xfer->tx_buf) { >>>> peripheral.cmd = SPI_TX; >>>> peripheral.rx_len = 0; >>>> + if (xfer->len <= QCOM_GPI_IMMEDIATE_DMA_LEN) >>>> + peripheral.flags |= QCOM_GPI_IMMEDIATE_DMA; >>>> } else if (xfer->rx_buf) { >>>> peripheral.cmd = SPI_RX; >>>> if (!(mas->cur_bits_per_word % MIN_WORD_LEN)) { >>>> diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h >>>> index 6680dd1a43c6..0eb96e62a1f1 100644 >>>> --- a/include/linux/dma/qcom-gpi-dma.h >>>> +++ b/include/linux/dma/qcom-gpi-dma.h >>>> @@ -15,6 +15,11 @@ enum spi_transfer_cmd { >>>> SPI_DUPLEX, >>>> }; >>>> +#define QCOM_GPI_BLOCK_EVENT_IRQ BIT(0) >>> >>> Unrelated, please drop. >> Sure, done. >>> >>>> +#define QCOM_GPI_IMMEDIATE_DMA BIT(1) >>> >>> Can GPI driver deduce whether it should use immediate DMA based on the >>> transfer length? >> protocol driver like SPI will update whether it should use immediate DMA or >> not based on the transfer length and GPI driver will configure it >> accordingly . > > You are describing your implementation. I asked if it is possible to be > implemented other way around. Otherwise you are adding too much > knowledge to the SPI driver. Yeah got it. It is possible to check in GPI driver only whether to use immediate DMA and handle it. > >>> >>>> + >>>> +#define QCOM_GPI_IMMEDIATE_DMA_LEN 8 >>>> + >>>> /** >>>> * struct gpi_spi_config - spi config for peripheral >>>> * >>>> @@ -30,6 +35,7 @@ enum spi_transfer_cmd { >>>> * @cs: chip select toggle >>>> * @set_config: set peripheral config >>>> * @rx_len: receive length for buffer >>>> + * @flags: flags for immediate dma and block event interrupt support >>>> */ >>>> struct gpi_spi_config { >>>> u8 set_config; >>>> @@ -44,6 +50,7 @@ struct gpi_spi_config { >>>> u32 clk_src; >>>> enum spi_transfer_cmd cmd; >>>> u32 rx_len; >>>> + u8 flags; >>>> }; >>>> enum i2c_op { >>>> -- >>>> 2.17.1 >>>> >>> >
diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 52a7c8f2498f..a8df1e835e27 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -27,6 +27,7 @@ #define TRE_FLAGS_IEOT BIT(9) #define TRE_FLAGS_BEI BIT(10) #define TRE_FLAGS_LINK BIT(11) +#define TRE_FLAGS_IMMEDIATE_DMA BIT(16) #define TRE_FLAGS_TYPE GENMASK(23, 16) /* SPI CONFIG0 WD0 */ @@ -64,6 +65,7 @@ /* DMA TRE */ #define TRE_DMA_LEN GENMASK(23, 0) +#define TRE_DMA_IMMEDIATE_LEN GENMASK(3, 0) /* Register offsets from gpi-top */ #define GPII_n_CH_k_CNTXT_0_OFFS(n, k) (0x20000 + (0x4000 * (n)) + (0x80 * (k))) @@ -1711,6 +1713,8 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, dma_addr_t address; struct gpi_tre *tre; unsigned int i; + u8 *buf; + int len = 0; /* first create config tre if applicable */ if (direction == DMA_MEM_TO_DEV && spi->set_config) { @@ -1763,14 +1767,32 @@ static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc, tre_idx++; address = sg_dma_address(sgl); - tre->dword[0] = lower_32_bits(address); - tre->dword[1] = upper_32_bits(address); + len = sg_dma_len(sgl); - tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN); + /* Support Immediate dma for write transfers for data length up to 8 bytes */ + if ((spi->flags & QCOM_GPI_IMMEDIATE_DMA) && direction == DMA_MEM_TO_DEV) { + buf = (u8 *)sg_virt(sgl); - tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); - if (direction == DMA_MEM_TO_DEV) + /* memcpy may not always be length of 8, hence pre-fill both dword's with 0 */ + tre->dword[0] = 0; + tre->dword[1] = 0; + memcpy((u8 *)&tre->dword[0], buf, len); + + tre->dword[2] = u32_encode_bits(len, TRE_DMA_IMMEDIATE_LEN); + + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IMMEDIATE_DMA); + } else { + tre->dword[0] = lower_32_bits(address); + tre->dword[1] = upper_32_bits(address); + + tre->dword[2] = u32_encode_bits(len, TRE_DMA_LEN); + + tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE); + if (direction == DMA_MEM_TO_DEV) + tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT); + } for (i = 0; i < tre_idx; i++) dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0], diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 768d7482102a..53c8f6b7f3c5 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -472,11 +472,18 @@ static int setup_gsi_xfer(struct spi_transfer *xfer, struct spi_geni_master *mas mas->cur_speed_hz = xfer->speed_hz; } + /* + * Set QCOM_GPI_IMMEDIATE_DMA flag if transfer length up to 8 bytes. + */ if (xfer->tx_buf && xfer->rx_buf) { peripheral.cmd = SPI_DUPLEX; + if (xfer->len <= QCOM_GPI_IMMEDIATE_DMA_LEN) + peripheral.flags |= QCOM_GPI_IMMEDIATE_DMA; } else if (xfer->tx_buf) { peripheral.cmd = SPI_TX; peripheral.rx_len = 0; + if (xfer->len <= QCOM_GPI_IMMEDIATE_DMA_LEN) + peripheral.flags |= QCOM_GPI_IMMEDIATE_DMA; } else if (xfer->rx_buf) { peripheral.cmd = SPI_RX; if (!(mas->cur_bits_per_word % MIN_WORD_LEN)) { diff --git a/include/linux/dma/qcom-gpi-dma.h b/include/linux/dma/qcom-gpi-dma.h index 6680dd1a43c6..0eb96e62a1f1 100644 --- a/include/linux/dma/qcom-gpi-dma.h +++ b/include/linux/dma/qcom-gpi-dma.h @@ -15,6 +15,11 @@ enum spi_transfer_cmd { SPI_DUPLEX, }; +#define QCOM_GPI_BLOCK_EVENT_IRQ BIT(0) +#define QCOM_GPI_IMMEDIATE_DMA BIT(1) + +#define QCOM_GPI_IMMEDIATE_DMA_LEN 8 + /** * struct gpi_spi_config - spi config for peripheral * @@ -30,6 +35,7 @@ enum spi_transfer_cmd { * @cs: chip select toggle * @set_config: set peripheral config * @rx_len: receive length for buffer + * @flags: flags for immediate dma and block event interrupt support */ struct gpi_spi_config { u8 set_config; @@ -44,6 +50,7 @@ struct gpi_spi_config { u32 clk_src; enum spi_transfer_cmd cmd; u32 rx_len; + u8 flags; }; enum i2c_op {
The DMA TRE(Transfer ring element) buffer contains the DMA buffer address. Accessing data from this address can cause significant delays in SPI transfers, which can be mitigated to some extent by utilizing immediate DMA support. QCOM GPI DMA hardware supports an immediate DMA feature for data up to 8 bytes, storing the data directly in the DMA TRE buffer instead of the DMA buffer address. This enhancement enables faster SPI data transfers. This optimization reduces the average transfer time from 25 us to 16 us for a single SPI transfer of 8 bytes length, with a clock frequency of 50 MHz. Signed-off-by: Jyothi Kumar Seerapu <quic_jseerapu@quicinc.com> --- drivers/dma/qcom/gpi.c | 32 +++++++++++++++++++++++++++----- drivers/spi/spi-geni-qcom.c | 7 +++++++ include/linux/dma/qcom-gpi-dma.h | 7 +++++++ 3 files changed, 41 insertions(+), 5 deletions(-)