[v3] iommu/arm-smmu-v3: permit users to disable MSI polling

Message ID	20200801074703.17108-1-song.bao.hua@hisilicon.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=15xB=BL=lists.infradead.org=linux-arm-kernel-bounces+patchwork-linux-arm=patchwork.kernel.org@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org E06C220725 From: Barry Song <song.bao.hua@hisilicon.com> To: <will@kernel.org>, <robin.murphy@arm.com>, <joro@8bytes.org>, <iommu@lists.linux-foundation.org> Subject: [PATCH v3] iommu/arm-smmu-v3: permit users to disable MSI polling Date: Sat, 1 Aug 2020 19:47:03 +1200 Message-ID: <20200801074703.17108-1-song.bao.hua@hisilicon.com> MIME-Version: 1.0 summary: Content analysis details: (0.7 points) pts rule name description ---- ---------------------- -------------------------------------------------- 0.0 RCVD_IN_MSPIKE_H4 RBL: Very Good reputation (+4) [45.249.212.32 listed in wl.mailspike.net] -0.0 SPF_PASS SPF: sender matches SPF record -2.3 RCVD_IN_DNSWL_MED RBL: Sender listed at https://www.dnswl.org/, medium trust [45.249.212.32 listed in list.dnswl.org] -0.0 SPF_HELO_PASS SPF: HELO matches SPF record 0.0 RCVD_IN_MSPIKE_WL Mailspike good senders 3.0 AC_FROM_MANY_DOTS Multiple periods in From user name Precedence: list Cc: Barry Song <song.bao.hua@hisilicon.com>, prime.zeng@hisilicon.com, linux-arm-kernel@lists.infradead.org Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: "linux-arm-kernel" <linux-arm-kernel-bounces@lists.infradead.org> Errors-To: linux-arm-kernel-bounces+patchwork-linux-arm=patchwork.kernel.org@lists.infradead.org
Series	[v3] iommu/arm-smmu-v3: permit users to disable MSI polling \| expand [v3] iommu/arm-smmu-v3: permit users to disable MSI polling

Message ID

20200801074703.17108-1-song.bao.hua@hisilicon.com (mailing list archive)

State

New, archived

Headers

DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org E06C220725
From: Barry Song <song.bao.hua@hisilicon.com>
To: <will@kernel.org>, <robin.murphy@arm.com>, <joro@8bytes.org>,
 <iommu@lists.linux-foundation.org>
Subject: [PATCH v3] iommu/arm-smmu-v3: permit users to disable MSI polling
Date: Sat, 1 Aug 2020 19:47:03 +1200
Message-ID: <20200801074703.17108-1-song.bao.hua@hisilicon.com>
MIME-Version: 1.0
Precedence: list
Cc: Barry Song <song.bao.hua@hisilicon.com>, prime.zeng@hisilicon.com,
 linux-arm-kernel@lists.infradead.org
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Sender: "linux-arm-kernel" <linux-arm-kernel-bounces@lists.infradead.org>
Errors-To: 
 linux-arm-kernel-bounces+patchwork-linux-arm=patchwork.kernel.org@lists.infradead.org

Series

[v3] iommu/arm-smmu-v3: permit users to disable MSI polling | expand

Commit Message

Song Bao Hua (Barry Song) Aug. 1, 2020, 7:47 a.m. UTC

Polling by MSI isn't necessarily faster than polling by SEV. Tests on
hi1620 show hns3 100G NIC network throughput can improve from 25G to
27G if we disable MSI polling while running 16 netperf threads sending
UDP packets in size 32KB.
This patch provides a command line option so that users can decide to
use MSI polling or not based on their tests.

Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
---
 -v3:
  * rebase on top of linux-next as arm-smmu-v3.c has moved;
  * provide a command line option

 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

Comments

John Garry Aug. 3, 2020, 3:33 p.m. UTC | #1

On 01/08/2020 08:47, Barry Song wrote:
> Polling by MSI isn't necessarily faster than polling by SEV. Tests on
> hi1620 show hns3 100G NIC network throughput can improve from 25G to
> 27G if we disable MSI polling while running 16 netperf threads sending
> UDP packets in size 32KB.

BTW, Do we have any more results than this? This is just one scenario.

How about your micro-benchmark, which allows you to set the number of CPUs?

Thanks,
John

> This patch provides a command line option so that users can decide to
> use MSI polling or not based on their tests.
> 
> Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
> ---
>   -v3:
>    * rebase on top of linux-next as arm-smmu-v3.c has moved;
>    * provide a command line option
> 
>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18 ++++++++++++++----
>   1 file changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 7196207be7ea..89d3cb391fef 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -418,6 +418,11 @@ module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
>   MODULE_PARM_DESC(disable_bypass,
>   	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
>   
> +static bool disable_msipolling;
> +module_param_named(disable_msipolling, disable_msipolling, bool, S_IRUGO);
> +MODULE_PARM_DESC(disable_msipolling,
> +	"Disable MSI-based polling for CMD_SYNC completion.");
> +
>   enum pri_resp {
>   	PRI_RESP_DENY = 0,
>   	PRI_RESP_FAIL = 1,
> @@ -980,6 +985,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
>   	return 0;
>   }
>   
> +static bool arm_smmu_use_msipolling(struct arm_smmu_device *smmu)
> +{
> +	return !disable_msipolling &&
> +	       smmu->features & ARM_SMMU_FEAT_COHERENCY &&
> +	       smmu->features & ARM_SMMU_FEAT_MSI;
> +}
> +
>   static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
>   					 u32 prod)
>   {
> @@ -992,8 +1004,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
>   	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
>   	 * payload, so the write will zero the entire command on that platform.
>   	 */
> -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> -	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
> +	if (arm_smmu_use_msipolling(smmu)) {
>   		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
>   				   q->ent_dwords * 8;
>   	}
> @@ -1332,8 +1343,7 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
>   static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
>   					 struct arm_smmu_ll_queue *llq)
>   {
> -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> -	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
> +	if (arm_smmu_use_msipolling(smmu))
>   		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
>   
>   	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
>

Song Bao Hua (Barry Song) Aug. 3, 2020, 8:40 p.m. UTC | #2

> -----Original Message-----
> From: John Garry
> Sent: Tuesday, August 4, 2020 3:34 AM
> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>; will@kernel.org;
> robin.murphy@arm.com; joro@8bytes.org; iommu@lists.linux-foundation.org
> Cc: Zengtao (B) <prime.zeng@hisilicon.com>;
> linux-arm-kernel@lists.infradead.org
> Subject: Re: [PATCH v3] iommu/arm-smmu-v3: permit users to disable MSI
> polling
> 
> On 01/08/2020 08:47, Barry Song wrote:
> > Polling by MSI isn't necessarily faster than polling by SEV. Tests on
> > hi1620 show hns3 100G NIC network throughput can improve from 25G to
> > 27G if we disable MSI polling while running 16 netperf threads sending
> > UDP packets in size 32KB.
> 
> BTW, Do we have any more results than this? This is just one scenario.
> 

John, it is more than a scenario. Micro-benchmark shows polling by SEV has less latency
than MSI. This motivated me to use a real scenario to verify. For this network case, if we set
thread to 1 rather than 16, network TX through can improve from 7Gbps to 7.7Gbps

> How about your micro-benchmark, which allows you to set the number of
> CPUs?

The micro-benchmark is working like this:
Sending A CMD_SYNC in an empty command queue
Polling the completion of this CMD_SYNC by MSI or SEV.

I have seen the polling latency can decrease by about 80ns. Without this patch,
the latency was about ~270ns, after this patch, it would be about
~190ns.

> 
> Thanks,
> John
> 
> > This patch provides a command line option so that users can decide to
> > use MSI polling or not based on their tests.
> >
> > Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
> > ---
> >   -v3:
> >    * rebase on top of linux-next as arm-smmu-v3.c has moved;
> >    * provide a command line option
> >
> >   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18
> ++++++++++++++----
> >   1 file changed, 14 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index 7196207be7ea..89d3cb391fef 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -418,6 +418,11 @@ module_param_named(disable_bypass,
> disable_bypass, bool, S_IRUGO);
> >   MODULE_PARM_DESC(disable_bypass,
> >   	"Disable bypass streams such that incoming transactions from devices
> that are not attached to an iommu domain will report an abort back to the
> device and will not be allowed to pass through the SMMU.");
> >
> > +static bool disable_msipolling;
> > +module_param_named(disable_msipolling, disable_msipolling, bool,
> S_IRUGO);
> > +MODULE_PARM_DESC(disable_msipolling,
> > +	"Disable MSI-based polling for CMD_SYNC completion.");
> > +
> >   enum pri_resp {
> >   	PRI_RESP_DENY = 0,
> >   	PRI_RESP_FAIL = 1,
> > @@ -980,6 +985,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd,
> struct arm_smmu_cmdq_ent *ent)
> >   	return 0;
> >   }
> >
> > +static bool arm_smmu_use_msipolling(struct arm_smmu_device *smmu)
> > +{
> > +	return !disable_msipolling &&
> > +	       smmu->features & ARM_SMMU_FEAT_COHERENCY &&
> > +	       smmu->features & ARM_SMMU_FEAT_MSI;
> > +}
> > +
> >   static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct
> arm_smmu_device *smmu,
> >   					 u32 prod)
> >   {
> > @@ -992,8 +1004,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64
> *cmd, struct arm_smmu_device *smmu,
> >   	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
> >   	 * payload, so the write will zero the entire command on that platform.
> >   	 */
> > -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> > -	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
> > +	if (arm_smmu_use_msipolling(smmu)) {
> >   		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
> >   				   q->ent_dwords * 8;
> >   	}
> > @@ -1332,8 +1343,7 @@ static int
> __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
> >   static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device
> *smmu,
> >   					 struct arm_smmu_ll_queue *llq)
> >   {
> > -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> > -	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
> > +	if (arm_smmu_use_msipolling(smmu))
> >   		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
> >
> >   	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
> >

Thanks
Barry

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 7196207be7ea..89d3cb391fef 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -418,6 +418,11 @@  module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_bypass,
 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
 
+static bool disable_msipolling;
+module_param_named(disable_msipolling, disable_msipolling, bool, S_IRUGO);
+MODULE_PARM_DESC(disable_msipolling,
+	"Disable MSI-based polling for CMD_SYNC completion.");
+
 enum pri_resp {
 	PRI_RESP_DENY = 0,
 	PRI_RESP_FAIL = 1,
@@ -980,6 +985,13 @@  static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 	return 0;
 }
 
+static bool arm_smmu_use_msipolling(struct arm_smmu_device *smmu)
+{
+	return !disable_msipolling &&
+	       smmu->features & ARM_SMMU_FEAT_COHERENCY &&
+	       smmu->features & ARM_SMMU_FEAT_MSI;
+}
+
 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 					 u32 prod)
 {
@@ -992,8 +1004,7 @@  static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
 	 * payload, so the write will zero the entire command on that platform.
 	 */
-	if (smmu->features & ARM_SMMU_FEAT_MSI &&
-	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
+	if (arm_smmu_use_msipolling(smmu)) {
 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
 				   q->ent_dwords * 8;
 	}
@@ -1332,8 +1343,7 @@  static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
 					 struct arm_smmu_ll_queue *llq)
 {
-	if (smmu->features & ARM_SMMU_FEAT_MSI &&
-	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
+	if (arm_smmu_use_msipolling(smmu))
 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
 
 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);

[v3] iommu/arm-smmu-v3: permit users to disable MSI polling

Commit Message

Comments

Patch