diff mbox series

[RFC,v2,6/7] iommu/arm-smmu-v3: Use KVM VMID for s2 stage

Message ID 20240208151837.35068-7-shameerali.kolothum.thodi@huawei.com (mailing list archive)
State New, archived
Headers show
Series iommu/arm-smmu-v3: Use pinned KVM VMID for stage 2 | expand

Commit Message

Shameerali Kolothum Thodi Feb. 8, 2024, 3:18 p.m. UTC
If kvm is available make use of kvm pinned VMID interfaces to
set the s2 stage VMID for nested domains.

Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18 +++++++++++++-----
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  2 ++
 2 files changed, 15 insertions(+), 5 deletions(-)

Comments

Jason Gunthorpe Feb. 8, 2024, 3:59 p.m. UTC | #1
On Thu, Feb 08, 2024 at 03:18:36PM +0000, Shameer Kolothum wrote:
> If kvm is available make use of kvm pinned VMID interfaces to
> set the s2 stage VMID for nested domains.
> 
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18 +++++++++++++-----
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  2 ++
>  2 files changed, 15 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index b41d77787a2f..18e3e04b50f4 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -18,6 +18,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/io-pgtable.h>
>  #include <linux/iopoll.h>
> +#include <linux/kvm_host.h>
>  #include <linux/module.h>
>  #include <linux/msi.h>
>  #include <linux/of.h>
> @@ -2399,9 +2400,13 @@ int arm_smmu_domain_alloc_id(struct arm_smmu_device *smmu,
>  	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2) {
>  		int vmid;
>  
> -		/* Reserve VMID 0 for stage-2 bypass STEs */
> -		vmid = ida_alloc_range(&smmu->vmid_map, 1,
> -				       (1 << smmu->vmid_bits) - 1, GFP_KERNEL);
> +		if (smmu_domain->kvm) {
> +			vmid = kvm_pinned_vmid_get(smmu_domain->kvm);
> +		} else {
> +			/* Reserve VMID 0 for stage-2 bypass STEs */
> +			vmid = ida_alloc_range(&smmu->vmid_map, 1,
> +					       (1 << smmu->vmid_bits) - 1, GFP_KERNEL);
> +		}

We cannot allow the two different STEs to be programmed with the same
VMID but different translations, so somehow the two allocators have to
work together.

This is why the SVA BTM code has that complex ASID reassignment logic
so it can get away with two allocators. However ASID also has SMMU HW
ASET support to opt-in to the BTM broadcast.

My suggestion is to avoid two allocators and make iommu instances that
support BTM always use the KVM owned VMID allocator by forbidding a S2
domain from being created with a NULL KVM.

IOW all the DMA API/etc will use S1 domains and the only way to get to
a S2 is to allocate a nesting parent via iommufd - for BTM systems
only.

Since the S2 can't opt-out of the BTM broadcast this means the VMIDs
are cleanly assigned and we never get an issue where the KVM TLBI's
are flushing an unrelated S2.

Jason
Shameerali Kolothum Thodi Feb. 8, 2024, 4:14 p.m. UTC | #2
> -----Original Message-----
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Thursday, February 8, 2024 3:59 PM
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: kvmarm@lists.linux.dev; iommu@lists.linux.dev; linux-arm-
> kernel@lists.infradead.org; Linuxarm <linuxarm@huawei.com>;
> kevin.tian@intel.com; alex.williamson@redhat.com; maz@kernel.org;
> oliver.upton@linux.dev; will@kernel.org; robin.murphy@arm.com; jean-
> philippe@linaro.org; Jonathan Cameron <jonathan.cameron@huawei.com>
> Subject: Re: [RFC PATCH v2 6/7] iommu/arm-smmu-v3: Use KVM VMID for s2
> stage
> 
> On Thu, Feb 08, 2024 at 03:18:36PM +0000, Shameer Kolothum wrote:
> > If kvm is available make use of kvm pinned VMID interfaces to
> > set the s2 stage VMID for nested domains.
> >
> > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> > ---
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18 +++++++++++++-----
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  2 ++
> >  2 files changed, 15 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index b41d77787a2f..18e3e04b50f4 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -18,6 +18,7 @@
> >  #include <linux/interrupt.h>
> >  #include <linux/io-pgtable.h>
> >  #include <linux/iopoll.h>
> > +#include <linux/kvm_host.h>
> >  #include <linux/module.h>
> >  #include <linux/msi.h>
> >  #include <linux/of.h>
> > @@ -2399,9 +2400,13 @@ int arm_smmu_domain_alloc_id(struct
> arm_smmu_device *smmu,
> >  	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2) {
> >  		int vmid;
> >
> > -		/* Reserve VMID 0 for stage-2 bypass STEs */
> > -		vmid = ida_alloc_range(&smmu->vmid_map, 1,
> > -				       (1 << smmu->vmid_bits) - 1, GFP_KERNEL);
> > +		if (smmu_domain->kvm) {
> > +			vmid = kvm_pinned_vmid_get(smmu_domain->kvm);
> > +		} else {
> > +			/* Reserve VMID 0 for stage-2 bypass STEs */
> > +			vmid = ida_alloc_range(&smmu->vmid_map, 1,
> > +					       (1 << smmu->vmid_bits) - 1,
> GFP_KERNEL);
> > +		}
> 
> We cannot allow the two different STEs to be programmed with the same
> VMID but different translations, so somehow the two allocators have to
> work together.

My idea was, we only set smmu_domain->kvm in the nested parent case
and that means SMMUv3 supports both S1 & S2. So all the non-nested domains
are using S1.

> 
> This is why the SVA BTM code has that complex ASID reassignment logic
> so it can get away with two allocators. However ASID also has SMMU HW
> ASET support to opt-in to the BTM broadcast.
> 
> My suggestion is to avoid two allocators and make iommu instances that
> support BTM always use the KVM owned VMID allocator by forbidding a S2
> domain from being created with a NULL KVM.

This is what I was trying to do. But not sure we have systems out there where
only S2  is supported and that may require a private VMID without KVM.

> 
> IOW all the DMA API/etc will use S1 domains and the only way to get to
> a S2 is to allocate a nesting parent via iommufd - for BTM systems
> only.
> 
> Since the S2 can't opt-out of the BTM broadcast this means the VMIDs
> are cleanly assigned and we never get an issue where the KVM TLBI's
> are flushing an unrelated S2.

In the last patch we only enable BTM if only S1 is supported. Meaning we will
always have a KVM VMID associated for S2(if it supports that). 

Did I miss something here?

Thanks,
Shameer
Jason Gunthorpe Feb. 8, 2024, 4:26 p.m. UTC | #3
On Thu, Feb 08, 2024 at 04:14:07PM +0000, Shameerali Kolothum Thodi wrote:
> > >  #include <linux/of.h>
> > > @@ -2399,9 +2400,13 @@ int arm_smmu_domain_alloc_id(struct
> > arm_smmu_device *smmu,
> > >  	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2) {
> > >  		int vmid;
> > >
> > > -		/* Reserve VMID 0 for stage-2 bypass STEs */
> > > -		vmid = ida_alloc_range(&smmu->vmid_map, 1,
> > > -				       (1 << smmu->vmid_bits) - 1, GFP_KERNEL);
> > > +		if (smmu_domain->kvm) {
> > > +			vmid = kvm_pinned_vmid_get(smmu_domain->kvm);
> > > +		} else {
> > > +			/* Reserve VMID 0 for stage-2 bypass STEs */
> > > +			vmid = ida_alloc_range(&smmu->vmid_map, 1,
> > > +					       (1 << smmu->vmid_bits) - 1,
> > GFP_KERNEL);
> > > +		}
> > 
> > We cannot allow the two different STEs to be programmed with the same
> > VMID but different translations, so somehow the two allocators have to
> > work together.
> 
> My idea was, we only set smmu_domain->kvm in the nested parent case
> and that means SMMUv3 supports both S1 & S2. So all the non-nested domains
> are using S1.

Okay, that is what I was thinking to get to.

The logic needs to reflect this directly though:

/*
 * There can only be one allocator for VMIDs active at once. If BTM is
 * turned on then KVM's allocator always supplies the VMID, and the
 * VMID is matched by CPU invalidation of the KVM S2. Right now there
 * is no API to get an unused VMID from KVM so this also means BTM systems
 * cannot support S2 without an associated KVM.
 */
if ((smmu->feature & ARM_SMMU_FEAT_BTM)) {
   if (!smmu_domain->kvm)
      vmid = -EOPNOTSUPP;
   else
       vmid = kvm_pinned_vmid_get(smmu_domain->kvm);
} else {
	/* Reserve VMID 0 for stage-2 bypass STEs */
	vmid = ida_alloc_range(&smmu->vmid_map, 1,
		       (1 << smmu->vmid_bits) - 1, GFP_KERNEL);
}

(and matching change to the free side)

> This is what I was trying to do. But not sure we have systems out there where
> only S2  is supported and that may require a private VMID without KVM.

iommufd can ask for a nesting parent without supplying a KVM fd, which
would cause vmid aliasing between the two allocators. That needs to be
blocked as above

Jason
Shameerali Kolothum Thodi Feb. 8, 2024, 4:36 p.m. UTC | #4
> -----Original Message-----
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Thursday, February 8, 2024 4:26 PM
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: kvmarm@lists.linux.dev; iommu@lists.linux.dev; linux-arm-
> kernel@lists.infradead.org; Linuxarm <linuxarm@huawei.com>;
> kevin.tian@intel.com; alex.williamson@redhat.com; maz@kernel.org;
> oliver.upton@linux.dev; will@kernel.org; robin.murphy@arm.com; jean-
> philippe@linaro.org; Jonathan Cameron <jonathan.cameron@huawei.com>
> Subject: Re: [RFC PATCH v2 6/7] iommu/arm-smmu-v3: Use KVM VMID for s2
> stage
> 
> On Thu, Feb 08, 2024 at 04:14:07PM +0000, Shameerali Kolothum Thodi wrote:
> > > >  #include <linux/of.h>
> > > > @@ -2399,9 +2400,13 @@ int arm_smmu_domain_alloc_id(struct
> > > arm_smmu_device *smmu,
> > > >  	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2) {
> > > >  		int vmid;
> > > >
> > > > -		/* Reserve VMID 0 for stage-2 bypass STEs */
> > > > -		vmid = ida_alloc_range(&smmu->vmid_map, 1,
> > > > -				       (1 << smmu->vmid_bits) - 1, GFP_KERNEL);
> > > > +		if (smmu_domain->kvm) {
> > > > +			vmid = kvm_pinned_vmid_get(smmu_domain->kvm);
> > > > +		} else {
> > > > +			/* Reserve VMID 0 for stage-2 bypass STEs */
> > > > +			vmid = ida_alloc_range(&smmu->vmid_map, 1,
> > > > +					       (1 << smmu->vmid_bits) - 1,
> > > GFP_KERNEL);
> > > > +		}
> > >
> > > We cannot allow the two different STEs to be programmed with the same
> > > VMID but different translations, so somehow the two allocators have to
> > > work together.
> >
> > My idea was, we only set smmu_domain->kvm in the nested parent case
> > and that means SMMUv3 supports both S1 & S2. So all the non-nested
> domains
> > are using S1.
> 
> Okay, that is what I was thinking to get to.
> 
> The logic needs to reflect this directly though:
> 
> /*
>  * There can only be one allocator for VMIDs active at once. If BTM is
>  * turned on then KVM's allocator always supplies the VMID, and the
>  * VMID is matched by CPU invalidation of the KVM S2. Right now there
>  * is no API to get an unused VMID from KVM so this also means BTM systems
>  * cannot support S2 without an associated KVM.
>  */
> if ((smmu->feature & ARM_SMMU_FEAT_BTM)) {
>    if (!smmu_domain->kvm)
>       vmid = -EOPNOTSUPP;
>    else
>        vmid = kvm_pinned_vmid_get(smmu_domain->kvm);
> } else {
> 	/* Reserve VMID 0 for stage-2 bypass STEs */
> 	vmid = ida_alloc_range(&smmu->vmid_map, 1,
> 		       (1 << smmu->vmid_bits) - 1, GFP_KERNEL);
> }
> 
> (and matching change to the free side)
> 
> > This is what I was trying to do. But not sure we have systems out there where
> > only S2  is supported and that may require a private VMID without KVM.
> 
> iommufd can ask for a nesting parent without supplying a KVM fd, which
> would cause vmid aliasing between the two allocators. That needs to be
> blocked as above

Ah..That's right. Missed that case where KVM can be NULL, but still there is a 
request for nested domain.

Thanks,
Shameer
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index b41d77787a2f..18e3e04b50f4 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -18,6 +18,7 @@ 
 #include <linux/interrupt.h>
 #include <linux/io-pgtable.h>
 #include <linux/iopoll.h>
+#include <linux/kvm_host.h>
 #include <linux/module.h>
 #include <linux/msi.h>
 #include <linux/of.h>
@@ -2399,9 +2400,13 @@  int arm_smmu_domain_alloc_id(struct arm_smmu_device *smmu,
 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2) {
 		int vmid;
 
-		/* Reserve VMID 0 for stage-2 bypass STEs */
-		vmid = ida_alloc_range(&smmu->vmid_map, 1,
-				       (1 << smmu->vmid_bits) - 1, GFP_KERNEL);
+		if (smmu_domain->kvm) {
+			vmid = kvm_pinned_vmid_get(smmu_domain->kvm);
+		} else {
+			/* Reserve VMID 0 for stage-2 bypass STEs */
+			vmid = ida_alloc_range(&smmu->vmid_map, 1,
+					       (1 << smmu->vmid_bits) - 1, GFP_KERNEL);
+		}
 		if (vmid < 0)
 			return vmid;
 		smmu_domain->vmid = vmid;
@@ -2431,7 +2436,10 @@  void arm_smmu_domain_free_id(struct arm_smmu_domain *smmu_domain)
 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2 &&
 		   smmu_domain->vmid) {
 		arm_smmu_tlb_inv_all_s2(smmu_domain);
-		ida_free(&smmu->vmid_map, smmu_domain->vmid);
+		if (smmu_domain->kvm)
+			kvm_pinned_vmid_put(smmu_domain->kvm);
+		else
+			ida_free(&smmu->vmid_map, smmu_domain->vmid);
 	}
 }
 
@@ -3217,7 +3225,7 @@  arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
 			goto err_free;
 		}
 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
-		smmu_domain->nesting_parent = true;
+		smmu_domain->kvm = kvm;
 	}
 
 	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 45bcd72fcda4..7d732ea2a6ee 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -758,6 +758,8 @@  struct arm_smmu_domain {
 	struct mmu_notifier		mmu_notifier;
 	bool				btm_invalidation : 1;
 	bool				nesting_parent : 1;
+
+	struct kvm			*kvm;
 };
 
 struct arm_smmu_nested_domain {