diff mbox series

[v2,4/5] spi: axi-spi-engine: move message compile to optimize_message

Message ID 20240219-mainline-spi-precook-message-v2-4-4a762c6701b9@baylibre.com (mailing list archive)
State New, archived
Headers show
Series spi: add support for pre-cooking messages | expand

Commit Message

David Lechner Feb. 19, 2024, 10:33 p.m. UTC
In the AXI SPI Engine driver, compiling the message is an expensive
operation. Previously, it was done per message transfer in the
prepare_message hook. This patch moves the message compile to the
optimize_message hook so that it is only done once per message in
cases where the peripheral driver calls spi_optimize_message().

This can be a significant performance improvement for some peripherals.
For example, the ad7380 driver saw a 13% improvement in throughput
when using the AXI SPI Engine driver with this patch.

Since we now need two message states, one for the optimization stage
that doesn't change for the lifetime of the message and one that is
reset on each transfer for managing the current transfer state, the old
msg->state is split into msg->opt_state and spi_engine->msg_state. The
latter is included in the driver struct now since there is only one
current message at a time that can ever use it and it is in a hot path
so avoiding allocating a new one on each message transfer saves a few
cpu cycles and lets us get rid of the prepare_message callback.

Signed-off-by: David Lechner <dlechner@baylibre.com>
---

v2 changes: none

 drivers/spi/spi-axi-spi-engine.c | 40 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

Comments

Nuno Sá Feb. 20, 2024, 10:45 a.m. UTC | #1
On Mon, 2024-02-19 at 16:33 -0600, David Lechner wrote:
> In the AXI SPI Engine driver, compiling the message is an expensive
> operation. Previously, it was done per message transfer in the
> prepare_message hook. This patch moves the message compile to the
> optimize_message hook so that it is only done once per message in
> cases where the peripheral driver calls spi_optimize_message().
> 
> This can be a significant performance improvement for some peripherals.
> For example, the ad7380 driver saw a 13% improvement in throughput
> when using the AXI SPI Engine driver with this patch.
> 
> Since we now need two message states, one for the optimization stage
> that doesn't change for the lifetime of the message and one that is
> reset on each transfer for managing the current transfer state, the old
> msg->state is split into msg->opt_state and spi_engine->msg_state. The
> latter is included in the driver struct now since there is only one
> current message at a time that can ever use it and it is in a hot path
> so avoiding allocating a new one on each message transfer saves a few
> cpu cycles and lets us get rid of the prepare_message callback.
> 
> Signed-off-by: David Lechner <dlechner@baylibre.com>
> ---

Reviewed-by: Nuno Sa <nuno.sa@analog.com>

> 
> v2 changes: none
> 
>  drivers/spi/spi-axi-spi-engine.c | 40 +++++++++++++++++-----------------------
>  1 file changed, 17 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
> index ca66d202f0e2..6177c1a8d56e 100644
> --- a/drivers/spi/spi-axi-spi-engine.c
> +++ b/drivers/spi/spi-axi-spi-engine.c
> @@ -109,6 +109,7 @@ struct spi_engine {
>  	spinlock_t lock;
>  
>  	void __iomem *base;
> +	struct spi_engine_message_state msg_state;
>  	struct completion msg_complete;
>  	unsigned int int_enable;
>  };
> @@ -499,17 +500,11 @@ static irqreturn_t spi_engine_irq(int irq, void *devid)
>  	return IRQ_HANDLED;
>  }
>  
> -static int spi_engine_prepare_message(struct spi_controller *host,
> -				      struct spi_message *msg)
> +static int spi_engine_optimize_message(struct spi_message *msg)
>  {
>  	struct spi_engine_program p_dry, *p;
> -	struct spi_engine_message_state *st;
>  	size_t size;
>  
> -	st = kzalloc(sizeof(*st), GFP_KERNEL);
> -	if (!st)
> -		return -ENOMEM;
> -
>  	spi_engine_precompile_message(msg);
>  
>  	p_dry.length = 0;
> @@ -517,31 +512,22 @@ static int spi_engine_prepare_message(struct spi_controller
> *host,
>  
>  	size = sizeof(*p->instructions) * (p_dry.length + 1);
>  	p = kzalloc(sizeof(*p) + size, GFP_KERNEL);
> -	if (!p) {
> -		kfree(st);
> +	if (!p)
>  		return -ENOMEM;
> -	}
>  
>  	spi_engine_compile_message(msg, false, p);
>  
>  	spi_engine_program_add_cmd(p, false, SPI_ENGINE_CMD_SYNC(
>  						AXI_SPI_ENGINE_CUR_MSG_SYNC_ID));
>  
> -	st->p = p;
> -	st->cmd_buf = p->instructions;
> -	st->cmd_length = p->length;
> -	msg->state = st;
> +	msg->opt_state = p;
>  
>  	return 0;
>  }
>  
> -static int spi_engine_unprepare_message(struct spi_controller *host,
> -					struct spi_message *msg)
> +static int spi_engine_unoptimize_message(struct spi_message *msg)
>  {
> -	struct spi_engine_message_state *st = msg->state;
> -
> -	kfree(st->p);
> -	kfree(st);
> +	kfree(msg->opt_state);
>  
>  	return 0;
>  }
> @@ -550,10 +536,18 @@ static int spi_engine_transfer_one_message(struct
> spi_controller *host,
>  	struct spi_message *msg)
>  {
>  	struct spi_engine *spi_engine = spi_controller_get_devdata(host);
> -	struct spi_engine_message_state *st = msg->state;
> +	struct spi_engine_message_state *st = &spi_engine->msg_state;
> +	struct spi_engine_program *p = msg->opt_state;
>  	unsigned int int_enable = 0;
>  	unsigned long flags;
>  
> +	/* reinitialize message state for this transfer */
> +	memset(st, 0, sizeof(*st));
> +	st->p = p;
> +	st->cmd_buf = p->instructions;
> +	st->cmd_length = p->length;
> +	msg->state = st;
> +
>  	reinit_completion(&spi_engine->msg_complete);
>  
>  	spin_lock_irqsave(&spi_engine->lock, flags);
> @@ -658,8 +652,8 @@ static int spi_engine_probe(struct platform_device *pdev)
>  	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
>  	host->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2;
>  	host->transfer_one_message = spi_engine_transfer_one_message;
> -	host->prepare_message = spi_engine_prepare_message;
> -	host->unprepare_message = spi_engine_unprepare_message;
> +	host->optimize_message = spi_engine_optimize_message;
> +	host->unoptimize_message = spi_engine_unoptimize_message;
>  	host->num_chipselect = 8;
>  
>  	if (host->max_speed_hz == 0)
>
Jonathan Cameron Feb. 24, 2024, 4:51 p.m. UTC | #2
On Mon, 19 Feb 2024 16:33:21 -0600
David Lechner <dlechner@baylibre.com> wrote:

> In the AXI SPI Engine driver, compiling the message is an expensive
> operation. Previously, it was done per message transfer in the
> prepare_message hook. This patch moves the message compile to the
> optimize_message hook so that it is only done once per message in
> cases where the peripheral driver calls spi_optimize_message().
> 
> This can be a significant performance improvement for some peripherals.
> For example, the ad7380 driver saw a 13% improvement in throughput
> when using the AXI SPI Engine driver with this patch.
> 
> Since we now need two message states, one for the optimization stage
> that doesn't change for the lifetime of the message and one that is
> reset on each transfer for managing the current transfer state, the old
> msg->state is split into msg->opt_state and spi_engine->msg_state. The
> latter is included in the driver struct now since there is only one
> current message at a time that can ever use it and it is in a hot path
> so avoiding allocating a new one on each message transfer saves a few
> cpu cycles and lets us get rid of the prepare_message callback.
> 
> Signed-off-by: David Lechner <dlechner@baylibre.com>
Whilst I'm not familiar with this driver, from a quick look at this
patch and the driver code, looks fine to me. So FWIW
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
diff mbox series

Patch

diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index ca66d202f0e2..6177c1a8d56e 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -109,6 +109,7 @@  struct spi_engine {
 	spinlock_t lock;
 
 	void __iomem *base;
+	struct spi_engine_message_state msg_state;
 	struct completion msg_complete;
 	unsigned int int_enable;
 };
@@ -499,17 +500,11 @@  static irqreturn_t spi_engine_irq(int irq, void *devid)
 	return IRQ_HANDLED;
 }
 
-static int spi_engine_prepare_message(struct spi_controller *host,
-				      struct spi_message *msg)
+static int spi_engine_optimize_message(struct spi_message *msg)
 {
 	struct spi_engine_program p_dry, *p;
-	struct spi_engine_message_state *st;
 	size_t size;
 
-	st = kzalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		return -ENOMEM;
-
 	spi_engine_precompile_message(msg);
 
 	p_dry.length = 0;
@@ -517,31 +512,22 @@  static int spi_engine_prepare_message(struct spi_controller *host,
 
 	size = sizeof(*p->instructions) * (p_dry.length + 1);
 	p = kzalloc(sizeof(*p) + size, GFP_KERNEL);
-	if (!p) {
-		kfree(st);
+	if (!p)
 		return -ENOMEM;
-	}
 
 	spi_engine_compile_message(msg, false, p);
 
 	spi_engine_program_add_cmd(p, false, SPI_ENGINE_CMD_SYNC(
 						AXI_SPI_ENGINE_CUR_MSG_SYNC_ID));
 
-	st->p = p;
-	st->cmd_buf = p->instructions;
-	st->cmd_length = p->length;
-	msg->state = st;
+	msg->opt_state = p;
 
 	return 0;
 }
 
-static int spi_engine_unprepare_message(struct spi_controller *host,
-					struct spi_message *msg)
+static int spi_engine_unoptimize_message(struct spi_message *msg)
 {
-	struct spi_engine_message_state *st = msg->state;
-
-	kfree(st->p);
-	kfree(st);
+	kfree(msg->opt_state);
 
 	return 0;
 }
@@ -550,10 +536,18 @@  static int spi_engine_transfer_one_message(struct spi_controller *host,
 	struct spi_message *msg)
 {
 	struct spi_engine *spi_engine = spi_controller_get_devdata(host);
-	struct spi_engine_message_state *st = msg->state;
+	struct spi_engine_message_state *st = &spi_engine->msg_state;
+	struct spi_engine_program *p = msg->opt_state;
 	unsigned int int_enable = 0;
 	unsigned long flags;
 
+	/* reinitialize message state for this transfer */
+	memset(st, 0, sizeof(*st));
+	st->p = p;
+	st->cmd_buf = p->instructions;
+	st->cmd_length = p->length;
+	msg->state = st;
+
 	reinit_completion(&spi_engine->msg_complete);
 
 	spin_lock_irqsave(&spi_engine->lock, flags);
@@ -658,8 +652,8 @@  static int spi_engine_probe(struct platform_device *pdev)
 	host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
 	host->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2;
 	host->transfer_one_message = spi_engine_transfer_one_message;
-	host->prepare_message = spi_engine_prepare_message;
-	host->unprepare_message = spi_engine_unprepare_message;
+	host->optimize_message = spi_engine_optimize_message;
+	host->unoptimize_message = spi_engine_unoptimize_message;
 	host->num_chipselect = 8;
 
 	if (host->max_speed_hz == 0)