diff mbox series

[v2] iommu/arm-smmu-v3: disable MSI polling if SEV polling is faster

Message ID 20200731083343.18152-1-song.bao.hua@hisilicon.com (mailing list archive)
State New, archived
Headers show
Series [v2] iommu/arm-smmu-v3: disable MSI polling if SEV polling is faster | expand

Commit Message

Song Bao Hua (Barry Song) July 31, 2020, 8:33 a.m. UTC
Different implementations may show different performance by using SEV
polling or MSI polling.
On the implementation of hi1620, tests show disabling MSI polling can
bring performance improvement.
Using 16 threads to run netperf on hns3 100G NIC with UDP packet size
in 32768bytes and set iommu to strict, TX throughput can improve from
25Gbps to 27Gbps by this patch.
This patch adds a generic function to support implementation options
based on IIDR and disables MSI polling if IIDR matches the specific
implementation tested.

Cc: Prime Zeng <prime.zeng@hisilicon.com>
Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
---
 -v2: rather than disabling msipolling globally, only disable it for
 specific implementation based on IIDR

 drivers/iommu/arm-smmu-v3.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

Comments

John Garry July 31, 2020, 10:21 a.m. UTC | #1
On 31/07/2020 09:33, Barry Song wrote:
> Different implementations may show different performance by using SEV
> polling or MSI polling.
> On the implementation of hi1620, tests show disabling MSI polling can
> bring performance improvement.
> Using 16 threads to run netperf on hns3 100G NIC with UDP packet size
> in 32768bytes and set iommu to strict, TX throughput can improve from
> 25Gbps to 27Gbps by this patch.
> This patch adds a generic function to support implementation options
> based on IIDR and disables MSI polling if IIDR matches the specific
> implementation tested.
Not sure if we should do checks like this on an implementation basis. 
I'm sure maintainers will decide.

> 
> Cc: Prime Zeng <prime.zeng@hisilicon.com>
> Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
> ---
>   -v2: rather than disabling msipolling globally, only disable it for
>   specific implementation based on IIDR
> 
>   drivers/iommu/arm-smmu-v3.c | 31 +++++++++++++++++++++++++++++--

this file has moved, check linux-next

>   1 file changed, 29 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index f578677a5c41..ed5a6774eb45 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -88,6 +88,12 @@
>   #define IDR5_VAX			GENMASK(11, 10)
>   #define IDR5_VAX_52_BIT			1
>   
> +#define ARM_SMMU_IIDR			0x18
> +#define IIDR_VARIANT			GENMASK(19, 16)
> +#define IIDR_REVISION			GENMASK(15, 12)
> +#define IIDR_IMPLEMENTER		GENMASK(11, 0)
> +#define IMPLEMENTER_HISILICON		0x736
> +
>   #define ARM_SMMU_CR0			0x20
>   #define CR0_ATSCHK			(1 << 4)
>   #define CR0_CMDQEN			(1 << 3)
> @@ -652,6 +658,7 @@ struct arm_smmu_device {
>   
>   #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
>   #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
> +#define ARM_SMMU_OPT_DISABLE_MSIPOLL    (1 << 2)
>   	u32				options;
>   
>   	struct arm_smmu_cmdq		cmdq;
> @@ -992,7 +999,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
>   	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
>   	 * payload, so the write will zero the entire command on that platform.
>   	 */
> -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> +	if (!(smmu->options & ARM_SMMU_OPT_DISABLE_MSIPOLL) &&
> +	    smmu->features & ARM_SMMU_FEAT_MSI &&

I don't know why you check MSIPOLL disabled and then MSI poll supported. 
Surely for native non-MSI poll (like hi1616), the ARM_SMMU_FEAT_MSI 
check first makes sense. This is fastpath, albeit fast to maybe wait..

>   	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
>   		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
>   				   q->ent_dwords * 8;
> @@ -1332,7 +1340,8 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
>   static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
>   					 struct arm_smmu_ll_queue *llq)
>   {
> -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> +	if (!(smmu->options & ARM_SMMU_OPT_DISABLE_MSIPOLL) &&
> +	    smmu->features & ARM_SMMU_FEAT_MSI &&
>   	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
>   		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
>   
> @@ -3693,6 +3702,21 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
>   	return 0;
>   }
>   
> +static void acpi_smmu_get_implementation_options(struct arm_smmu_device *smmu)
> +{
> +	/*
> +	 * IIDR provides information about the implementation and implementer of
> +	 * the SMMU
> +	 */
> +	u32 iidr = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
> +	u32 implementer = FIELD_GET(IIDR_IMPLEMENTER, iidr);
> +	u32 variant = FIELD_GET(IIDR_VARIANT, iidr);
> +	u32 revision = FIELD_GET(IIDR_REVISION, iidr);

why not check the product ID also, i.e. the complete register contents?

> +
> +	if (implementer == IMPLEMENTER_HISILICON && variant == 3 && revision == 0)
> +		smmu->options |= ARM_SMMU_OPT_DISABLE_MSIPOLL;
> +}
> +
>   static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
>   {
>   	u32 reg;
> @@ -3892,6 +3916,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
>   
>   	smmu->ias = max(smmu->ias, smmu->oas);
>   
> +	/* set implementation-related options according to IIDR */
> +	acpi_smmu_get_implementation_options(smmu);
> +
>   	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
>   		 smmu->ias, smmu->oas, smmu->features);
>   	return 0;
>
Song Bao Hua (Barry Song) July 31, 2020, 10:48 a.m. UTC | #2
> -----Original Message-----
> From: John Garry
> Sent: Friday, July 31, 2020 10:21 PM
> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>; will@kernel.org;
> robin.murphy@arm.com; joro@8bytes.org; iommu@lists.linux-foundation.org
> Cc: Zengtao (B) <prime.zeng@hisilicon.com>; Linuxarm
> <linuxarm@huawei.com>; linux-arm-kernel@lists.infradead.org
> Subject: Re: [PATCH v2] iommu/arm-smmu-v3: disable MSI polling if SEV
> polling is faster
> 
> On 31/07/2020 09:33, Barry Song wrote:
> > Different implementations may show different performance by using SEV
> > polling or MSI polling.
> > On the implementation of hi1620, tests show disabling MSI polling can
> > bring performance improvement.
> > Using 16 threads to run netperf on hns3 100G NIC with UDP packet size
> > in 32768bytes and set iommu to strict, TX throughput can improve from
> > 25Gbps to 27Gbps by this patch.
> > This patch adds a generic function to support implementation options
> > based on IIDR and disables MSI polling if IIDR matches the specific
> > implementation tested.
> Not sure if we should do checks like this on an implementation basis.
> I'm sure maintainers will decide.

Yes, maintainers will decide. I guess Will won't object to IIDR-based solution according to
previous discussion threads:
https://lore.kernel.org/patchwork/patch/783718/

Am I right, Will?

> 
> >
> > Cc: Prime Zeng <prime.zeng@hisilicon.com>
> > Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
> > ---
> >   -v2: rather than disabling msipolling globally, only disable it for
> >   specific implementation based on IIDR
> >
> >   drivers/iommu/arm-smmu-v3.c | 31 +++++++++++++++++++++++++++++--
> 
> this file has moved, check linux-next

Thanks for reminding. Hopefully Will or Robin can give some feedback so that the v3 can come to fix this
as well as other issues they might point out.

> 
> >   1 file changed, 29 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > index f578677a5c41..ed5a6774eb45 100644
> > --- a/drivers/iommu/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm-smmu-v3.c
> > @@ -88,6 +88,12 @@
> >   #define IDR5_VAX			GENMASK(11, 10)
> >   #define IDR5_VAX_52_BIT			1
> >
> > +#define ARM_SMMU_IIDR			0x18
> > +#define IIDR_VARIANT			GENMASK(19, 16)
> > +#define IIDR_REVISION			GENMASK(15, 12)
> > +#define IIDR_IMPLEMENTER		GENMASK(11, 0)
> > +#define IMPLEMENTER_HISILICON		0x736
> > +
> >   #define ARM_SMMU_CR0			0x20
> >   #define CR0_ATSCHK			(1 << 4)
> >   #define CR0_CMDQEN			(1 << 3)
> > @@ -652,6 +658,7 @@ struct arm_smmu_device {
> >
> >   #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
> >   #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
> > +#define ARM_SMMU_OPT_DISABLE_MSIPOLL    (1 << 2)
> >   	u32				options;
> >
> >   	struct arm_smmu_cmdq		cmdq;
> > @@ -992,7 +999,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64
> *cmd, struct arm_smmu_device *smmu,
> >   	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
> >   	 * payload, so the write will zero the entire command on that platform.
> >   	 */
> > -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> > +	if (!(smmu->options & ARM_SMMU_OPT_DISABLE_MSIPOLL) &&
> > +	    smmu->features & ARM_SMMU_FEAT_MSI &&
> 
> I don't know why you check MSIPOLL disabled and then MSI poll supported.
> Surely for native non-MSI poll (like hi1616), the ARM_SMMU_FEAT_MSI
> check first makes sense. This is fastpath, albeit fast to maybe wait..

I was thinking !(smmu->options & ARM_SMMU_OPT_DISABLE_MSIPOLL) is the fast path
for 1620 as it can jump out quickly.

But yes, you are right since there are many other SMMU not only 1620.


> 
> >   	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
> >   		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
> >   				   q->ent_dwords * 8;
> > @@ -1332,7 +1340,8 @@ static int
> __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
> >   static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device
> *smmu,
> >   					 struct arm_smmu_ll_queue *llq)
> >   {
> > -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> > +	if (!(smmu->options & ARM_SMMU_OPT_DISABLE_MSIPOLL) &&
> > +	    smmu->features & ARM_SMMU_FEAT_MSI &&
> >   	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
> >   		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
> >
> > @@ -3693,6 +3702,21 @@ static int arm_smmu_device_reset(struct
> arm_smmu_device *smmu, bool bypass)
> >   	return 0;
> >   }
> >
> > +static void acpi_smmu_get_implementation_options(struct
> arm_smmu_device *smmu)
> > +{
> > +	/*
> > +	 * IIDR provides information about the implementation and implementer
> of
> > +	 * the SMMU
> > +	 */
> > +	u32 iidr = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
> > +	u32 implementer = FIELD_GET(IIDR_IMPLEMENTER, iidr);
> > +	u32 variant = FIELD_GET(IIDR_VARIANT, iidr);
> > +	u32 revision = FIELD_GET(IIDR_REVISION, iidr);
> 
> why not check the product ID also, i.e. the complete register contents?

Ideally, we can use variant and revision to differentiate all 1616, 1620, 1630 and so on.
All of them should get different values for the combination of variant and revision.
However, I will think more about other fields as you are suggesting.

> 
> > +
> > +	if (implementer == IMPLEMENTER_HISILICON && variant == 3 && revision
> == 0)
> > +		smmu->options |= ARM_SMMU_OPT_DISABLE_MSIPOLL;
> > +}
> > +
> >   static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
> >   {
> >   	u32 reg;
> > @@ -3892,6 +3916,9 @@ static int arm_smmu_device_hw_probe(struct
> arm_smmu_device *smmu)
> >
> >   	smmu->ias = max(smmu->ias, smmu->oas);
> >
> > +	/* set implementation-related options according to IIDR */
> > +	acpi_smmu_get_implementation_options(smmu);
> > +
> >   	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
> >   		 smmu->ias, smmu->oas, smmu->features);
> >   	return 0;
> >

Thanks
Barry
Will Deacon July 31, 2020, 12:21 p.m. UTC | #3
On Fri, Jul 31, 2020 at 10:48:33AM +0000, Song Bao Hua (Barry Song) wrote:
> > -----Original Message-----
> > From: John Garry
> > Sent: Friday, July 31, 2020 10:21 PM
> > To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>; will@kernel.org;
> > robin.murphy@arm.com; joro@8bytes.org; iommu@lists.linux-foundation.org
> > Cc: Zengtao (B) <prime.zeng@hisilicon.com>; Linuxarm
> > <linuxarm@huawei.com>; linux-arm-kernel@lists.infradead.org
> > Subject: Re: [PATCH v2] iommu/arm-smmu-v3: disable MSI polling if SEV
> > polling is faster
> > 
> > On 31/07/2020 09:33, Barry Song wrote:
> > > Different implementations may show different performance by using SEV
> > > polling or MSI polling.
> > > On the implementation of hi1620, tests show disabling MSI polling can
> > > bring performance improvement.
> > > Using 16 threads to run netperf on hns3 100G NIC with UDP packet size
> > > in 32768bytes and set iommu to strict, TX throughput can improve from
> > > 25Gbps to 27Gbps by this patch.
> > > This patch adds a generic function to support implementation options
> > > based on IIDR and disables MSI polling if IIDR matches the specific
> > > implementation tested.
> > Not sure if we should do checks like this on an implementation basis.
> > I'm sure maintainers will decide.
> 
> Yes, maintainers will decide. I guess Will won't object to IIDR-based solution according to
> previous discussion threads:
> https://lore.kernel.org/patchwork/patch/783718/
> 
> Am I right, Will?

Honestly, I object to the whole idea that we should turn off optional
hardware features just because they're slow. Did nobody take time to look at
the design and check that it offered some benefit, or where they in too much
of a hurry to tick the checkbox to say they had the new feature? I really
dislike the pick and mix nature that some of this IP is heading in, where
the marketing folks want a slice of everything for the branding, instead of
doing a few useful things well. Anyway, that's not your fault, so I'll stop
moaning. *sigh*

Given that you've baked this thing now, then if we have to support it I
would prefer the command-line option. At least that means that people can
compare the performance with it on and off (and hopefully make sure the
hardware doesn't suck). It also means it's not specific to ACPI.

Will
Song Bao Hua (Barry Song) July 31, 2020, 12:30 p.m. UTC | #4
> -----Original Message-----
> From: Will Deacon [mailto:will@kernel.org]
> Sent: Saturday, August 1, 2020 12:22 AM
> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>
> Cc: John Garry <john.garry@huawei.com>; robin.murphy@arm.com;
> joro@8bytes.org; iommu@lists.linux-foundation.org; Zengtao (B)
> <prime.zeng@hisilicon.com>; Linuxarm <linuxarm@huawei.com>;
> linux-arm-kernel@lists.infradead.org
> Subject: Re: [PATCH v2] iommu/arm-smmu-v3: disable MSI polling if SEV
> polling is faster
> 
> On Fri, Jul 31, 2020 at 10:48:33AM +0000, Song Bao Hua (Barry Song) wrote:
> > > -----Original Message-----
> > > From: John Garry
> > > Sent: Friday, July 31, 2020 10:21 PM
> > > To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>;
> will@kernel.org;
> > > robin.murphy@arm.com; joro@8bytes.org;
> iommu@lists.linux-foundation.org
> > > Cc: Zengtao (B) <prime.zeng@hisilicon.com>; Linuxarm
> > > <linuxarm@huawei.com>; linux-arm-kernel@lists.infradead.org
> > > Subject: Re: [PATCH v2] iommu/arm-smmu-v3: disable MSI polling if SEV
> > > polling is faster
> > >
> > > On 31/07/2020 09:33, Barry Song wrote:
> > > > Different implementations may show different performance by using SEV
> > > > polling or MSI polling.
> > > > On the implementation of hi1620, tests show disabling MSI polling can
> > > > bring performance improvement.
> > > > Using 16 threads to run netperf on hns3 100G NIC with UDP packet size
> > > > in 32768bytes and set iommu to strict, TX throughput can improve from
> > > > 25Gbps to 27Gbps by this patch.
> > > > This patch adds a generic function to support implementation options
> > > > based on IIDR and disables MSI polling if IIDR matches the specific
> > > > implementation tested.
> > > Not sure if we should do checks like this on an implementation basis.
> > > I'm sure maintainers will decide.
> >
> > Yes, maintainers will decide. I guess Will won't object to IIDR-based solution
> according to
> > previous discussion threads:
> > https://lore.kernel.org/patchwork/patch/783718/
> >
> > Am I right, Will?
> 
> Honestly, I object to the whole idea that we should turn off optional
> hardware features just because they're slow. Did nobody take time to look at
> the design and check that it offered some benefit, or where they in too much
> of a hurry to tick the checkbox to say they had the new feature? I really
> dislike the pick and mix nature that some of this IP is heading in, where
> the marketing folks want a slice of everything for the branding, instead of
> doing a few useful things well. Anyway, that's not your fault, so I'll stop
> moaning. *sigh*
> 
> Given that you've baked this thing now, then if we have to support it I
> would prefer the command-line option. At least that means that people can
> compare the performance with it on and off (and hopefully make sure the
> hardware doesn't suck). It also means it's not specific to ACPI.

Hi Will,
Thanks for your comment. I had a patch with command line option as below.
If it is what you prefer, I'd refine this one and send.

[PATCH] iommu/arm-smmu-v3: permit users to disable msi polling
---
 drivers/iommu/arm-smmu-v3.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index f578677a5c41..4fb1681308e4 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -418,6 +418,11 @@ module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_bypass,
 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
 
+static bool disable_msipolling = 1;
+module_param_named(disable_msipolling, disable_msipolling, bool, S_IRUGO);
+MODULE_PARM_DESC(disable_msipolling,
+	"Don't use MSI to poll the completion of CMD_SYNC if it is slower than SEV");
+
 enum pri_resp {
 	PRI_RESP_DENY = 0,
 	PRI_RESP_FAIL = 1,
@@ -992,7 +997,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
 	 * payload, so the write will zero the entire command on that platform.
 	 */
-	if (smmu->features & ARM_SMMU_FEAT_MSI &&
+	if (!disable_msipolling && smmu->features & ARM_SMMU_FEAT_MSI &&
 	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
 				   q->ent_dwords * 8;
@@ -1332,7 +1337,7 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
 					 struct arm_smmu_ll_queue *llq)
 {
-	if (smmu->features & ARM_SMMU_FEAT_MSI &&
+	if (!disable_msipolling && smmu->features & ARM_SMMU_FEAT_MSI &&
 	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
Will Deacon July 31, 2020, 12:43 p.m. UTC | #5
On Fri, Jul 31, 2020 at 12:30:27PM +0000, Song Bao Hua (Barry Song) wrote:
> Thanks for your comment. I had a patch with command line option as below.
> If it is what you prefer, I'd refine this one and send.
> 
> [PATCH] iommu/arm-smmu-v3: permit users to disable msi polling
> ---
>  drivers/iommu/arm-smmu-v3.c | 9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index f578677a5c41..4fb1681308e4 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -418,6 +418,11 @@ module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
>  MODULE_PARM_DESC(disable_bypass,
>  	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
>  
> +static bool disable_msipolling = 1;
> +module_param_named(disable_msipolling, disable_msipolling, bool, S_IRUGO);
> +MODULE_PARM_DESC(disable_msipolling,
> +	"Don't use MSI to poll the completion of CMD_SYNC if it is slower than SEV");

Reword this to "Disable MSI-based polling for CMD_SYNC completion."

> +
>  enum pri_resp {
>  	PRI_RESP_DENY = 0,
>  	PRI_RESP_FAIL = 1,
> @@ -992,7 +997,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
>  	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
>  	 * payload, so the write will zero the entire command on that platform.
>  	 */
> -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> +	if (!disable_msipolling && smmu->features & ARM_SMMU_FEAT_MSI &&
>  	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {

Probably now cleaner to wrap this up into a helper:

static bool arm_smmu_use_msipolling(struct arm_smmu_device *smmu)
{
	return !disable_msipolling &&
	       smmu->features & ARM_SMMU_FEAT_COHERENCY &&
	       smmu->features & ARM_SMMU_FEAT_MSI;
}

>  		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
>  				   q->ent_dwords * 8;
> @@ -1332,7 +1337,7 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
>  static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
>  					 struct arm_smmu_ll_queue *llq)
>  {
> -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> +	if (!disable_msipolling && smmu->features & ARM_SMMU_FEAT_MSI &&
>  	    smmu->features & ARM_SMMU_FEAT_COHERENCY)

Then you can use the new helper here too.

Will
diff mbox series

Patch

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index f578677a5c41..ed5a6774eb45 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -88,6 +88,12 @@ 
 #define IDR5_VAX			GENMASK(11, 10)
 #define IDR5_VAX_52_BIT			1
 
+#define ARM_SMMU_IIDR			0x18
+#define IIDR_VARIANT			GENMASK(19, 16)
+#define IIDR_REVISION			GENMASK(15, 12)
+#define IIDR_IMPLEMENTER		GENMASK(11, 0)
+#define IMPLEMENTER_HISILICON		0x736
+
 #define ARM_SMMU_CR0			0x20
 #define CR0_ATSCHK			(1 << 4)
 #define CR0_CMDQEN			(1 << 3)
@@ -652,6 +658,7 @@  struct arm_smmu_device {
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
+#define ARM_SMMU_OPT_DISABLE_MSIPOLL    (1 << 2)
 	u32				options;
 
 	struct arm_smmu_cmdq		cmdq;
@@ -992,7 +999,8 @@  static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
 	 * payload, so the write will zero the entire command on that platform.
 	 */
-	if (smmu->features & ARM_SMMU_FEAT_MSI &&
+	if (!(smmu->options & ARM_SMMU_OPT_DISABLE_MSIPOLL) &&
+	    smmu->features & ARM_SMMU_FEAT_MSI &&
 	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
 				   q->ent_dwords * 8;
@@ -1332,7 +1340,8 @@  static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
 					 struct arm_smmu_ll_queue *llq)
 {
-	if (smmu->features & ARM_SMMU_FEAT_MSI &&
+	if (!(smmu->options & ARM_SMMU_OPT_DISABLE_MSIPOLL) &&
+	    smmu->features & ARM_SMMU_FEAT_MSI &&
 	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
 
@@ -3693,6 +3702,21 @@  static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 	return 0;
 }
 
+static void acpi_smmu_get_implementation_options(struct arm_smmu_device *smmu)
+{
+	/*
+	 * IIDR provides information about the implementation and implementer of
+	 * the SMMU
+	 */
+	u32 iidr = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
+	u32 implementer = FIELD_GET(IIDR_IMPLEMENTER, iidr);
+	u32 variant = FIELD_GET(IIDR_VARIANT, iidr);
+	u32 revision = FIELD_GET(IIDR_REVISION, iidr);
+
+	if (implementer == IMPLEMENTER_HISILICON && variant == 3 && revision == 0)
+		smmu->options |= ARM_SMMU_OPT_DISABLE_MSIPOLL;
+}
+
 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 {
 	u32 reg;
@@ -3892,6 +3916,9 @@  static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 
 	smmu->ias = max(smmu->ias, smmu->oas);
 
+	/* set implementation-related options according to IIDR */
+	acpi_smmu_get_implementation_options(smmu);
+
 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
 		 smmu->ias, smmu->oas, smmu->features);
 	return 0;