diff mbox series

[RFC,14/21] RFC: iommu/iommufd/amd: Add IOMMU_HWPT_TRUSTED flag, tweak DTE's DomainID, IOTLB

Message ID 20240823132137.336874-15-aik@amd.com (mailing list archive)
State Handled Elsewhere
Headers show
Series Secure VFIO, TDISP, SEV TIO | expand

Commit Message

Alexey Kardashevskiy Aug. 23, 2024, 1:21 p.m. UTC
AMD IOMMUs use a device table where one entry (DTE) describes IOMMU
setup per a PCI BDFn. DMA accesses via these DTEs are always
unencrypted.

In order to allow DMA to/from private memory, AMD IOMMUs use another
memory structure called "secure device table" which entries (sDTEs)
are similar to DTE and contain configuration for private DMA operations.
The sDTE table is in the private memory and is managed by the PSP on
behalf of a SNP VM. So the host OS does not have access to it and
does not need to manage it.

However if sDTE is enabled, some fields of a DTE are now marked as
reserved in a DTE and managed by an sDTE instead (such as DomainID),
other fields need to stay in sync (IR/IW).

Mark IOMMU HW page table with a flag saying that the memory is
backed by KVM (effectively MEMFD).

Skip setting the DomainID in DTE. Enable IOTLB enable (bit 96) to
match what the PSP writes to sDTE.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
---
 drivers/iommu/amd/amd_iommu_types.h  |  2 ++
 include/uapi/linux/iommufd.h         |  1 +
 drivers/iommu/amd/iommu.c            | 20 ++++++++++++++++++--
 drivers/iommu/iommufd/hw_pagetable.c |  4 ++++
 4 files changed, 25 insertions(+), 2 deletions(-)

Comments

Jason Gunthorpe Aug. 27, 2024, 12:17 p.m. UTC | #1
On Fri, Aug 23, 2024 at 11:21:28PM +1000, Alexey Kardashevskiy wrote:
> AMD IOMMUs use a device table where one entry (DTE) describes IOMMU
> setup per a PCI BDFn. DMA accesses via these DTEs are always
> unencrypted.
> 
> In order to allow DMA to/from private memory, AMD IOMMUs use another
> memory structure called "secure device table" which entries (sDTEs)
> are similar to DTE and contain configuration for private DMA operations.
> The sDTE table is in the private memory and is managed by the PSP on
> behalf of a SNP VM. So the host OS does not have access to it and
> does not need to manage it.
> 
> However if sDTE is enabled, some fields of a DTE are now marked as
> reserved in a DTE and managed by an sDTE instead (such as DomainID),
> other fields need to stay in sync (IR/IW).
> 
> Mark IOMMU HW page table with a flag saying that the memory is
> backed by KVM (effectively MEMFD).
> 
> Skip setting the DomainID in DTE. Enable IOTLB enable (bit 96) to
> match what the PSP writes to sDTE.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
> ---
>  drivers/iommu/amd/amd_iommu_types.h  |  2 ++
>  include/uapi/linux/iommufd.h         |  1 +
>  drivers/iommu/amd/iommu.c            | 20 ++++++++++++++++++--
>  drivers/iommu/iommufd/hw_pagetable.c |  4 ++++
>  4 files changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
> index 2b76b5dedc1d..cf435c1f2839 100644
> --- a/drivers/iommu/amd/amd_iommu_types.h
> +++ b/drivers/iommu/amd/amd_iommu_types.h
> @@ -588,6 +588,8 @@ struct protection_domain {
>  
>  	struct mmu_notifier mn;	/* mmu notifier for the SVA domain */
>  	struct list_head dev_data_list; /* List of pdom_dev_data */
> +
> +	u32 flags;
>  };
>  
>  /*
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index 4dde745cfb7e..c5536686b0b1 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -364,6 +364,7 @@ enum iommufd_hwpt_alloc_flags {
>  	IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
>  	IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
>  	IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
> +	IOMMU_HWPT_TRUSTED = 1 << 3,
>  };

This looks so extremely specialized to AMD I think you should put this
in an AMD specific struct.

> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index b19e8c0f48fa..e2f8fb79ee53 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -1930,7 +1930,20 @@ static void set_dte_entry(struct amd_iommu *iommu,
>  	}
>  
>  	flags &= ~DEV_DOMID_MASK;
> -	flags |= domid;
> +
> +	if (dev_data->dev->tdi_enabled && (domain->flags & IOMMU_HWPT_TRUSTED)) {
> +		/*
> +		 * Do hack for VFIO with TSM enabled.
> +		 * This runs when VFIO is being bound to a device and before TDI is bound.
> +		 * Ideally TSM should change DTE only when TDI is bound.
> +		 * Probably better test for (domain->domain.type & __IOMMU_DOMAIN_DMA_API)

No, that wouldn't be better.

This seems sketchy, shouldn't the iommu driver be confirming that the
PSP has enabled the vDTE before making these assumptions?

> diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
> index aefde4443671..23ae95fc95ee 100644
> --- a/drivers/iommu/iommufd/hw_pagetable.c
> +++ b/drivers/iommu/iommufd/hw_pagetable.c
> @@ -136,6 +136,10 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
>  	hwpt_paging->nest_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
>  
>  	if (ops->domain_alloc_user) {
> +		if (ictx->kvm) {
> +			pr_info("Trusted domain");
> +			flags |= IOMMU_HWPT_TRUSTED;
> +		}

Huh?

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 2b76b5dedc1d..cf435c1f2839 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -588,6 +588,8 @@  struct protection_domain {
 
 	struct mmu_notifier mn;	/* mmu notifier for the SVA domain */
 	struct list_head dev_data_list; /* List of pdom_dev_data */
+
+	u32 flags;
 };
 
 /*
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 4dde745cfb7e..c5536686b0b1 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -364,6 +364,7 @@  enum iommufd_hwpt_alloc_flags {
 	IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
 	IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
 	IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
+	IOMMU_HWPT_TRUSTED = 1 << 3,
 };
 
 /**
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b19e8c0f48fa..e2f8fb79ee53 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1930,7 +1930,20 @@  static void set_dte_entry(struct amd_iommu *iommu,
 	}
 
 	flags &= ~DEV_DOMID_MASK;
-	flags |= domid;
+
+	if (dev_data->dev->tdi_enabled && (domain->flags & IOMMU_HWPT_TRUSTED)) {
+		/*
+		 * Do hack for VFIO with TSM enabled.
+		 * This runs when VFIO is being bound to a device and before TDI is bound.
+		 * Ideally TSM should change DTE only when TDI is bound.
+		 * Probably better test for (domain->domain.type & __IOMMU_DOMAIN_DMA_API)
+		 */
+		dev_info(dev_data->dev, "Skip DomainID=%x and set bit96\n", domid);
+		flags |= 1ULL << (96 - 64);
+	} else {
+		//dev_info(dev_data->dev, "Not skip DomainID=%x and not set bit96\n", domid);
+		flags |= domid;
+	}
 
 	old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK;
 	dev_table[devid].data[1]  = flags;
@@ -2413,6 +2426,8 @@  static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
 
 		if (dirty_tracking)
 			domain->domain.dirty_ops = &amd_dirty_ops;
+
+		domain->flags = flags;
 	}
 
 	return &domain->domain;
@@ -2437,7 +2452,8 @@  amd_iommu_domain_alloc_user(struct device *dev, u32 flags,
 {
 	unsigned int type = IOMMU_DOMAIN_UNMANAGED;
 
-	if ((flags & ~IOMMU_HWPT_ALLOC_DIRTY_TRACKING) || parent || user_data)
+	if ((flags & ~(IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_TRUSTED)) ||
+	    parent || user_data)
 		return ERR_PTR(-EOPNOTSUPP);
 
 	return do_iommu_domain_alloc(type, dev, flags);
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index aefde4443671..23ae95fc95ee 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -136,6 +136,10 @@  iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 	hwpt_paging->nest_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
 
 	if (ops->domain_alloc_user) {
+		if (ictx->kvm) {
+			pr_info("Trusted domain");
+			flags |= IOMMU_HWPT_TRUSTED;
+		}
 		hwpt->domain = ops->domain_alloc_user(idev->dev, flags, NULL,
 						      user_data);
 		if (IS_ERR(hwpt->domain)) {