diff mbox series

[RFC,RESEND,3/6] iommu/riscv: support GSCID

Message ID 20240507142600.23844-4-zong.li@sifive.com (mailing list archive)
State Superseded
Headers show
Series RISC-V IOMMU HPM and nested IOMMU support | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Zong Li May 7, 2024, 2:25 p.m. UTC
This patch adds a global ID Allocator for GSCID and a wrap for setting
up GSCID in IOTLB invalidation command.

Set up iohgatp to enable second stage table and flus stage-2 table if
the GSCID is allocated.

The GSCID of domain should be freed when release domain. GSCID will be
allocated for parent domain in nested IOMMU process.

Signed-off-by: Zong Li <zong.li@sifive.com>
---
 drivers/iommu/riscv/iommu-bits.h |  7 +++
 drivers/iommu/riscv/iommu.c      | 81 ++++++++++++++++++++++----------
 2 files changed, 62 insertions(+), 26 deletions(-)

Comments

Jason Gunthorpe May 7, 2024, 3:15 p.m. UTC | #1
On Tue, May 07, 2024 at 10:25:57PM +0800, Zong Li wrote:
> @@ -919,29 +924,43 @@ static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
>  	rcu_read_lock();
>  
>  	prev = NULL;
> -	list_for_each_entry_rcu(bond, &domain->bonds, list) {
> -		iommu = dev_to_iommu(bond->dev);
>  
> -		/*
> -		 * IOTLB invalidation request can be safely omitted if already sent
> -		 * to the IOMMU for the same PSCID, and with domain->bonds list
> -		 * arranged based on the device's IOMMU, it's sufficient to check
> -		 * last device the invalidation was sent to.
> -		 */
> -		if (iommu == prev)
> -			continue;
> -
> -		riscv_iommu_cmd_inval_vma(&cmd);
> -		riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
> -		if (len && len >= RISCV_IOMMU_IOTLB_INVAL_LIMIT) {
> -			for (iova = start; iova < end; iova += PAGE_SIZE) {
> -				riscv_iommu_cmd_inval_set_addr(&cmd, iova);
> +	/*
> +	 * Host domain needs to flush entries in stage-2 for MSI mapping.
> +	 * However, device is bound to s1 domain instead of s2 domain.
> +	 * We need to flush mapping without looping devices of s2 domain
> +	 */
> +	if (domain->gscid) {
> +		riscv_iommu_cmd_inval_gvma(&cmd);
> +		riscv_iommu_cmd_inval_set_gscid(&cmd, domain->gscid);
> +		riscv_iommu_cmd_send(iommu, &cmd, 0);
> +		riscv_iommu_cmd_iofence(&cmd);
> +		riscv_iommu_cmd_send(iommu, &cmd, RISCV_IOMMU_QUEUE_TIMEOUT);

Is iommu null here? Where did it come from?

This looks wrong too. The "bonds" list is sort of misnamed, it is
really a list of invalidation instructions. If you need a special
invalidation instruction for this case then you should allocate a
memory and add it to the bond list when the attach is done.

Invalidation should simply iterate over the bond list and do the
instructions it contains, always.

>  static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu,
> -				     struct device *dev, u64 fsc, u64 ta)
> +				     struct device *dev, u64 fsc, u64 ta, u64 iohgatp)

I think you should make a struct to represent the dc entry.

Jason
Zong Li May 7, 2024, 3:52 p.m. UTC | #2
On Tue, May 7, 2024 at 11:15 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Tue, May 07, 2024 at 10:25:57PM +0800, Zong Li wrote:
> > @@ -919,29 +924,43 @@ static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
> >       rcu_read_lock();
> >
> >       prev = NULL;
> > -     list_for_each_entry_rcu(bond, &domain->bonds, list) {
> > -             iommu = dev_to_iommu(bond->dev);
> >
> > -             /*
> > -              * IOTLB invalidation request can be safely omitted if already sent
> > -              * to the IOMMU for the same PSCID, and with domain->bonds list
> > -              * arranged based on the device's IOMMU, it's sufficient to check
> > -              * last device the invalidation was sent to.
> > -              */
> > -             if (iommu == prev)
> > -                     continue;
> > -
> > -             riscv_iommu_cmd_inval_vma(&cmd);
> > -             riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
> > -             if (len && len >= RISCV_IOMMU_IOTLB_INVAL_LIMIT) {
> > -                     for (iova = start; iova < end; iova += PAGE_SIZE) {
> > -                             riscv_iommu_cmd_inval_set_addr(&cmd, iova);
> > +     /*
> > +      * Host domain needs to flush entries in stage-2 for MSI mapping.
> > +      * However, device is bound to s1 domain instead of s2 domain.
> > +      * We need to flush mapping without looping devices of s2 domain
> > +      */
> > +     if (domain->gscid) {
> > +             riscv_iommu_cmd_inval_gvma(&cmd);
> > +             riscv_iommu_cmd_inval_set_gscid(&cmd, domain->gscid);
> > +             riscv_iommu_cmd_send(iommu, &cmd, 0);
> > +             riscv_iommu_cmd_iofence(&cmd);
> > +             riscv_iommu_cmd_send(iommu, &cmd, RISCV_IOMMU_QUEUE_TIMEOUT);
>
> Is iommu null here? Where did it come from?
>
> This looks wrong too. The "bonds" list is sort of misnamed, it is
> really a list of invalidation instructions. If you need a special
> invalidation instruction for this case then you should allocate a
> memory and add it to the bond list when the attach is done.
>
> Invalidation should simply iterate over the bond list and do the
> instructions it contains, always.

I messed up this piece of code while cleaning it. I will fix it in the
next version. However, after your tips, it seems to me that we should
allocate a new bond entry in the s2 domain's list. This is because the
original bond entry becomes detached from the s2 domain and is
attached to the s1 domain when the device passes through to the guest,
if we don't create a new bond in s2 domain, then the list in s2 domain
would lose it. Let me modify the implementation here. Thanks.

>
> >  static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu,
> > -                                  struct device *dev, u64 fsc, u64 ta)
> > +                                  struct device *dev, u64 fsc, u64 ta, u64 iohgatp)
>
> I think you should make a struct to represent the dc entry.
>
> Jason
Jason Gunthorpe May 7, 2024, 4:25 p.m. UTC | #3
On Tue, May 07, 2024 at 11:52:15PM +0800, Zong Li wrote:
> On Tue, May 7, 2024 at 11:15 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
> >
> > On Tue, May 07, 2024 at 10:25:57PM +0800, Zong Li wrote:
> > > @@ -919,29 +924,43 @@ static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
> > >       rcu_read_lock();
> > >
> > >       prev = NULL;
> > > -     list_for_each_entry_rcu(bond, &domain->bonds, list) {
> > > -             iommu = dev_to_iommu(bond->dev);
> > >
> > > -             /*
> > > -              * IOTLB invalidation request can be safely omitted if already sent
> > > -              * to the IOMMU for the same PSCID, and with domain->bonds list
> > > -              * arranged based on the device's IOMMU, it's sufficient to check
> > > -              * last device the invalidation was sent to.
> > > -              */
> > > -             if (iommu == prev)
> > > -                     continue;
> > > -
> > > -             riscv_iommu_cmd_inval_vma(&cmd);
> > > -             riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
> > > -             if (len && len >= RISCV_IOMMU_IOTLB_INVAL_LIMIT) {
> > > -                     for (iova = start; iova < end; iova += PAGE_SIZE) {
> > > -                             riscv_iommu_cmd_inval_set_addr(&cmd, iova);
> > > +     /*
> > > +      * Host domain needs to flush entries in stage-2 for MSI mapping.
> > > +      * However, device is bound to s1 domain instead of s2 domain.
> > > +      * We need to flush mapping without looping devices of s2 domain
> > > +      */
> > > +     if (domain->gscid) {
> > > +             riscv_iommu_cmd_inval_gvma(&cmd);
> > > +             riscv_iommu_cmd_inval_set_gscid(&cmd, domain->gscid);
> > > +             riscv_iommu_cmd_send(iommu, &cmd, 0);
> > > +             riscv_iommu_cmd_iofence(&cmd);
> > > +             riscv_iommu_cmd_send(iommu, &cmd, RISCV_IOMMU_QUEUE_TIMEOUT);
> >
> > Is iommu null here? Where did it come from?
> >
> > This looks wrong too. The "bonds" list is sort of misnamed, it is
> > really a list of invalidation instructions. If you need a special
> > invalidation instruction for this case then you should allocate a
> > memory and add it to the bond list when the attach is done.
> >
> > Invalidation should simply iterate over the bond list and do the
> > instructions it contains, always.
> 
> I messed up this piece of code while cleaning it. I will fix it in the
> next version. However, after your tips, it seems to me that we should
> allocate a new bond entry in the s2 domain's list. 

Yes, when the nest is attached the S2's bond should get a GSCID
invalidation instruction and the S1's bond should get no invalidation
instruction. Bond is better understood as "paging domain invalidation
instructions".

(also if you follow this advice, then again, I don't see why the idr
allocators are global)

You have to make a decision on the user invalidation flow, and some of
that depends on what you plan to do for ATS invalidation.

It is OK to make the nested domain locked to a single iommu, enforced
at attach time, but don't use the bond list to do it.

For single iommu you'd just de-virtualize the PSCID and the ATS vRID
and stuff the commands into the single iommu. But this shouldn't
interact with the bond. The bond list is about invalidation
instructions for the paging domain. Just loosely store the owning
instace in the nesting domain struct.

Also please be careful that you don't advertise ATS support to the
guest before the kernel driver understands how to support it. You
should probably put a note to that effect in the uapi struct for the
get info patch.

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
index 11351cf6c710..62b1ee387357 100644
--- a/drivers/iommu/riscv/iommu-bits.h
+++ b/drivers/iommu/riscv/iommu-bits.h
@@ -728,6 +728,13 @@  static inline void riscv_iommu_cmd_inval_vma(struct riscv_iommu_command *cmd)
 	cmd->dword1 = 0;
 }
 
+static inline void riscv_iommu_cmd_inval_gvma(struct riscv_iommu_command *cmd)
+{
+	cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOTINVAL_OPCODE) |
+		      FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA);
+	cmd->dword1 = 0;
+}
+
 static inline void riscv_iommu_cmd_inval_set_addr(struct riscv_iommu_command *cmd,
 						  u64 addr)
 {
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index e0bf74a9c64d..d38e09b138b6 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -45,6 +45,10 @@ 
 static DEFINE_IDA(riscv_iommu_pscids);
 #define RISCV_IOMMU_MAX_PSCID		(BIT(20) - 1)
 
+/* IOMMU GSCID allocation namespace. */
+static DEFINE_IDA(riscv_iommu_gscids);
+#define RISCV_IOMMU_MAX_GSCID		(BIT(16) - 1)
+
 /* Device resource-managed allocations */
 struct riscv_iommu_devres {
 	void *addr;
@@ -826,6 +830,7 @@  struct riscv_iommu_domain {
 	struct list_head bonds;
 	spinlock_t lock;		/* protect bonds list updates. */
 	int pscid;
+	int gscid;
 	int numa_node;
 	int amo_enabled:1;
 	unsigned int pgd_mode;
@@ -919,29 +924,43 @@  static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
 	rcu_read_lock();
 
 	prev = NULL;
-	list_for_each_entry_rcu(bond, &domain->bonds, list) {
-		iommu = dev_to_iommu(bond->dev);
 
-		/*
-		 * IOTLB invalidation request can be safely omitted if already sent
-		 * to the IOMMU for the same PSCID, and with domain->bonds list
-		 * arranged based on the device's IOMMU, it's sufficient to check
-		 * last device the invalidation was sent to.
-		 */
-		if (iommu == prev)
-			continue;
-
-		riscv_iommu_cmd_inval_vma(&cmd);
-		riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
-		if (len && len >= RISCV_IOMMU_IOTLB_INVAL_LIMIT) {
-			for (iova = start; iova < end; iova += PAGE_SIZE) {
-				riscv_iommu_cmd_inval_set_addr(&cmd, iova);
+	/*
+	 * Host domain needs to flush entries in stage-2 for MSI mapping.
+	 * However, device is bound to s1 domain instead of s2 domain.
+	 * We need to flush mapping without looping devices of s2 domain
+	 */
+	if (domain->gscid) {
+		riscv_iommu_cmd_inval_gvma(&cmd);
+		riscv_iommu_cmd_inval_set_gscid(&cmd, domain->gscid);
+		riscv_iommu_cmd_send(iommu, &cmd, 0);
+		riscv_iommu_cmd_iofence(&cmd);
+		riscv_iommu_cmd_send(iommu, &cmd, RISCV_IOMMU_QUEUE_TIMEOUT);
+	} else {
+		list_for_each_entry_rcu(bond, &domain->bonds, list) {
+			iommu = dev_to_iommu(bond->dev);
+
+			/*
+			 * IOTLB invalidation request can be safely omitted if already sent
+			 * to the IOMMU for the same PSCID, and with domain->bonds list
+			 * arranged based on the device's IOMMU, it's sufficient to check
+			 * last device the invalidation was sent to.
+			 */
+			if (iommu == prev)
+				continue;
+
+			riscv_iommu_cmd_inval_vma(&cmd);
+			riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
+			if (len && len >= RISCV_IOMMU_IOTLB_INVAL_LIMIT) {
+				for (iova = start; iova < end; iova += PAGE_SIZE) {
+					riscv_iommu_cmd_inval_set_addr(&cmd, iova);
+					riscv_iommu_cmd_send(iommu, &cmd, 0);
+				}
+			} else {
 				riscv_iommu_cmd_send(iommu, &cmd, 0);
 			}
-		} else {
-			riscv_iommu_cmd_send(iommu, &cmd, 0);
+			prev = iommu;
 		}
-		prev = iommu;
 	}
 
 	prev = NULL;
@@ -972,7 +991,7 @@  static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
  * interim translation faults.
  */
 static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu,
-				     struct device *dev, u64 fsc, u64 ta)
+				     struct device *dev, u64 fsc, u64 ta, u64 iohgatp)
 {
 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
 	struct riscv_iommu_dc *dc;
@@ -1012,6 +1031,7 @@  static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu,
 		/* Update device context, write TC.V as the last step. */
 		WRITE_ONCE(dc->fsc, fsc);
 		WRITE_ONCE(dc->ta, ta & RISCV_IOMMU_PC_TA_PSCID);
+		WRITE_ONCE(dc->iohgatp, iohgatp);
 		WRITE_ONCE(dc->tc, tc);
 	}
 }
@@ -1271,6 +1291,9 @@  static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain)
 	if ((int)domain->pscid > 0)
 		ida_free(&riscv_iommu_pscids, domain->pscid);
 
+	if ((int)domain->gscid > 0)
+		ida_free(&riscv_iommu_gscids, domain->gscid);
+
 	riscv_iommu_pte_free(domain, _io_pte_entry(pfn, _PAGE_TABLE), NULL);
 	kfree(domain);
 }
@@ -1296,7 +1319,7 @@  static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
 	struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
 	struct riscv_iommu_device *iommu = dev_to_iommu(dev);
 	struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
-	u64 fsc, ta;
+	u64 fsc = 0, iohgatp = 0, ta;
 
 	if (!riscv_iommu_pt_supported(iommu, domain->pgd_mode))
 		return -ENODEV;
@@ -1314,12 +1337,18 @@  static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
 	 */
 	riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX);
 
-	fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) |
-	      FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root));
+	if (domain->gscid)
+		iohgatp = FIELD_PREP(RISCV_IOMMU_DC_IOHGATP_MODE, domain->pgd_mode) |
+			  FIELD_PREP(RISCV_IOMMU_DC_IOHGATP_GSCID, domain->gscid) |
+			  FIELD_PREP(RISCV_IOMMU_DC_IOHGATP_PPN, virt_to_pfn(domain->pgd_root));
+	else
+		fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) |
+		      FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root));
+
 	ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) |
 	     RISCV_IOMMU_PC_TA_V;
 
-	riscv_iommu_iodir_update(iommu, dev, fsc, ta);
+	riscv_iommu_iodir_update(iommu, dev, fsc, ta, iohgatp);
 	riscv_iommu_bond_unlink(info->domain, dev);
 	info->domain = domain;
 
@@ -1422,7 +1451,7 @@  static int riscv_iommu_attach_blocking_domain(struct iommu_domain *iommu_domain,
 	struct riscv_iommu_device *iommu = dev_to_iommu(dev);
 	struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
 
-	riscv_iommu_iodir_update(iommu, dev, RISCV_IOMMU_FSC_BARE, 0);
+	riscv_iommu_iodir_update(iommu, dev, RISCV_IOMMU_FSC_BARE, 0, 0);
 	riscv_iommu_bond_unlink(info->domain, dev);
 	info->domain = NULL;
 
@@ -1442,7 +1471,7 @@  static int riscv_iommu_attach_identity_domain(struct iommu_domain *iommu_domain,
 	struct riscv_iommu_device *iommu = dev_to_iommu(dev);
 	struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
 
-	riscv_iommu_iodir_update(iommu, dev, RISCV_IOMMU_FSC_BARE, RISCV_IOMMU_PC_TA_V);
+	riscv_iommu_iodir_update(iommu, dev, RISCV_IOMMU_FSC_BARE, RISCV_IOMMU_PC_TA_V, 0);
 	riscv_iommu_bond_unlink(info->domain, dev);
 	info->domain = NULL;