diff mbox series

[v3] firmware: arm_scmi: Queue in scmi layer for mailbox implementation

Message ID 20241014160717.1678953-1-justin.chen@broadcom.com (mailing list archive)
State New, archived
Headers show
Series [v3] firmware: arm_scmi: Queue in scmi layer for mailbox implementation | expand

Commit Message

Justin Chen Oct. 14, 2024, 4:07 p.m. UTC
send_message() does not block in the MBOX implementation. This is
because the mailbox layer has its own queue. However, this confuses
the per xfer timeouts as they all start their timeout ticks in
parallel.

Consider a case where the xfer timeout is 30ms and a SCMI transaction
takes 25ms.

0ms: Message #0 is queued in mailbox layer and sent out, then sits
at scmi_wait_for_message_response() with a timeout of 30ms
1ms: Message #1 is queued in mailbox layer but not sent out yet.
Since send_message() doesn't block, it also sits at
scmi_wait_for_message_response() with a timeout of 30ms
...
25ms: Message #0 is completed, txdone is called and Message #1 is
sent out
31ms: Message #1 times out since the count started at 1ms. Even
though it has only been inflight for 6ms.

Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type")
Signed-off-by: Justin Chen <justin.chen@broadcom.com>
---

Changes in v3:
- Changed Fixes tag
- Fixed mutex imbalance
- Add Doxygen comment
- Fixed spelling mistake
- Moved mutex init

Changes in v2
- Added Fixes tag
- Improved commit message to better capture the issue

 .../firmware/arm_scmi/transports/mailbox.c    | 28 +++++++++++++------
 1 file changed, 19 insertions(+), 9 deletions(-)

Comments

Justin Chen Oct. 14, 2024, 4:12 p.m. UTC | #1
On 10/14/24 9:07 AM, Justin Chen wrote:
> send_message() does not block in the MBOX implementation. This is
> because the mailbox layer has its own queue. However, this confuses
> the per xfer timeouts as they all start their timeout ticks in
> parallel.
> 
> Consider a case where the xfer timeout is 30ms and a SCMI transaction
> takes 25ms.
> 
> 0ms: Message #0 is queued in mailbox layer and sent out, then sits
> at scmi_wait_for_message_response() with a timeout of 30ms
> 1ms: Message #1 is queued in mailbox layer but not sent out yet.
> Since send_message() doesn't block, it also sits at
> scmi_wait_for_message_response() with a timeout of 30ms
> ...
> 25ms: Message #0 is completed, txdone is called and Message #1 is
> sent out
> 31ms: Message #1 times out since the count started at 1ms. Even
> though it has only been inflight for 6ms.
> 
> Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type")
> Signed-off-by: Justin Chen <justin.chen@broadcom.com>

Woops forgot to add these.

Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Tested-by: Cristian Marussi <cristian.marussi@arm.com>

> ---
> 
> Changes in v3:
> - Changed Fixes tag
> - Fixed mutex imbalance
> - Add Doxygen comment
> - Fixed spelling mistake
> - Moved mutex init
> 
> Changes in v2
> - Added Fixes tag
> - Improved commit message to better capture the issue
> 
>   .../firmware/arm_scmi/transports/mailbox.c    | 28 +++++++++++++------
>   1 file changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/firmware/arm_scmi/transports/mailbox.c b/drivers/firmware/arm_scmi/transports/mailbox.c
> index 1a754dee24f7..af08fb5cc72f 100644
> --- a/drivers/firmware/arm_scmi/transports/mailbox.c
> +++ b/drivers/firmware/arm_scmi/transports/mailbox.c
> @@ -25,6 +25,7 @@
>    * @chan_platform_receiver: Optional Platform Receiver mailbox unidirectional channel
>    * @cinfo: SCMI channel info
>    * @shmem: Transmit/Receive shared memory area
> + * @chan_lock: Lock that prevents multiple xfers from being queued
>    */
>   struct scmi_mailbox {
>   	struct mbox_client cl;
> @@ -33,6 +34,7 @@ struct scmi_mailbox {
>   	struct mbox_chan *chan_platform_receiver;
>   	struct scmi_chan_info *cinfo;
>   	struct scmi_shared_mem __iomem *shmem;
> +	struct mutex chan_lock;
>   };
>   
>   #define client_to_scmi_mailbox(c) container_of(c, struct scmi_mailbox, cl)
> @@ -238,6 +240,7 @@ static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
>   
>   	cinfo->transport_info = smbox;
>   	smbox->cinfo = cinfo;
> +	mutex_init(&smbox->chan_lock);
>   
>   	return 0;
>   }
> @@ -267,13 +270,23 @@ static int mailbox_send_message(struct scmi_chan_info *cinfo,
>   	struct scmi_mailbox *smbox = cinfo->transport_info;
>   	int ret;
>   
> +	/*
> +	 * The mailbox layer has its own queue. However the mailbox queue confuses
> +	 * the per message SCMI timeouts since the clock starts when the message is
> +	 * submitted into the mailbox queue. So when multiple messages are queued up
> +	 * the clock starts on all messages instead of only the one inflight.
> +	 */
> +	mutex_lock(&smbox->chan_lock);
> +
>   	ret = mbox_send_message(smbox->chan, xfer);
>   
>   	/* mbox_send_message returns non-negative value on success, so reset */
> -	if (ret > 0)
> -		ret = 0;
> +	if (ret < 0) {
> +		mutex_unlock(&smbox->chan_lock);
> +		return ret;
> +	}
>   
> -	return ret;
> +	return 0;
>   }
>   
>   static void mailbox_mark_txdone(struct scmi_chan_info *cinfo, int ret,
> @@ -281,13 +294,10 @@ static void mailbox_mark_txdone(struct scmi_chan_info *cinfo, int ret,
>   {
>   	struct scmi_mailbox *smbox = cinfo->transport_info;
>   
> -	/*
> -	 * NOTE: we might prefer not to need the mailbox ticker to manage the
> -	 * transfer queueing since the protocol layer queues things by itself.
> -	 * Unfortunately, we have to kick the mailbox framework after we have
> -	 * received our message.
> -	 */
>   	mbox_client_txdone(smbox->chan, ret);
> +
> +	/* Release channel */
> +	mutex_unlock(&smbox->chan_lock);
>   }
>   
>   static void mailbox_fetch_response(struct scmi_chan_info *cinfo,
Sudeep Holla Oct. 15, 2024, 1:17 p.m. UTC | #2
On Mon, 14 Oct 2024 09:07:17 -0700, Justin Chen wrote:
> send_message() does not block in the MBOX implementation. This is
> because the mailbox layer has its own queue. However, this confuses
> the per xfer timeouts as they all start their timeout ticks in
> parallel.
> 
> Consider a case where the xfer timeout is 30ms and a SCMI transaction
> takes 25ms.
> 
> [...]

Applied to sudeep.holla/linux (for-next/scmi/fixes), thanks!

[1/1] firmware: arm_scmi: Queue in scmi layer for mailbox implementation
      https://git.kernel.org/sudeep.holla/c/da1642bc97c4
--
Regards,
Sudeep
diff mbox series

Patch

diff --git a/drivers/firmware/arm_scmi/transports/mailbox.c b/drivers/firmware/arm_scmi/transports/mailbox.c
index 1a754dee24f7..af08fb5cc72f 100644
--- a/drivers/firmware/arm_scmi/transports/mailbox.c
+++ b/drivers/firmware/arm_scmi/transports/mailbox.c
@@ -25,6 +25,7 @@ 
  * @chan_platform_receiver: Optional Platform Receiver mailbox unidirectional channel
  * @cinfo: SCMI channel info
  * @shmem: Transmit/Receive shared memory area
+ * @chan_lock: Lock that prevents multiple xfers from being queued
  */
 struct scmi_mailbox {
 	struct mbox_client cl;
@@ -33,6 +34,7 @@  struct scmi_mailbox {
 	struct mbox_chan *chan_platform_receiver;
 	struct scmi_chan_info *cinfo;
 	struct scmi_shared_mem __iomem *shmem;
+	struct mutex chan_lock;
 };
 
 #define client_to_scmi_mailbox(c) container_of(c, struct scmi_mailbox, cl)
@@ -238,6 +240,7 @@  static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
 
 	cinfo->transport_info = smbox;
 	smbox->cinfo = cinfo;
+	mutex_init(&smbox->chan_lock);
 
 	return 0;
 }
@@ -267,13 +270,23 @@  static int mailbox_send_message(struct scmi_chan_info *cinfo,
 	struct scmi_mailbox *smbox = cinfo->transport_info;
 	int ret;
 
+	/*
+	 * The mailbox layer has its own queue. However the mailbox queue confuses
+	 * the per message SCMI timeouts since the clock starts when the message is
+	 * submitted into the mailbox queue. So when multiple messages are queued up
+	 * the clock starts on all messages instead of only the one inflight.
+	 */
+	mutex_lock(&smbox->chan_lock);
+
 	ret = mbox_send_message(smbox->chan, xfer);
 
 	/* mbox_send_message returns non-negative value on success, so reset */
-	if (ret > 0)
-		ret = 0;
+	if (ret < 0) {
+		mutex_unlock(&smbox->chan_lock);
+		return ret;
+	}
 
-	return ret;
+	return 0;
 }
 
 static void mailbox_mark_txdone(struct scmi_chan_info *cinfo, int ret,
@@ -281,13 +294,10 @@  static void mailbox_mark_txdone(struct scmi_chan_info *cinfo, int ret,
 {
 	struct scmi_mailbox *smbox = cinfo->transport_info;
 
-	/*
-	 * NOTE: we might prefer not to need the mailbox ticker to manage the
-	 * transfer queueing since the protocol layer queues things by itself.
-	 * Unfortunately, we have to kick the mailbox framework after we have
-	 * received our message.
-	 */
 	mbox_client_txdone(smbox->chan, ret);
+
+	/* Release channel */
+	mutex_unlock(&smbox->chan_lock);
 }
 
 static void mailbox_fetch_response(struct scmi_chan_info *cinfo,