diff mbox series

[RFC,RESEND,6/6] iommu/riscv: support nested iommu for flushing cache

Message ID 20240507142600.23844-7-zong.li@sifive.com (mailing list archive)
State Superseded
Headers show
Series RISC-V IOMMU HPM and nested IOMMU support | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Zong Li May 7, 2024, 2:26 p.m. UTC
This patch implements cache_invalidate_user operation for the userspace
to flush the hardware caches for a nested domain through iommufd.

Signed-off-by: Zong Li <zong.li@sifive.com>
---
 drivers/iommu/riscv/iommu.c  | 91 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/iommufd.h |  9 ++++
 2 files changed, 100 insertions(+)

Comments

Jason Gunthorpe May 7, 2024, 3:08 p.m. UTC | #1
On Tue, May 07, 2024 at 10:26:00PM +0800, Zong Li wrote:
> This patch implements cache_invalidate_user operation for the userspace
> to flush the hardware caches for a nested domain through iommufd.
> 
> Signed-off-by: Zong Li <zong.li@sifive.com>
> ---
>  drivers/iommu/riscv/iommu.c  | 91 ++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/iommufd.h |  9 ++++
>  2 files changed, 100 insertions(+)
> 
> diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
> index 7eda850df475..4dd58fe2242d 100644
> --- a/drivers/iommu/riscv/iommu.c
> +++ b/drivers/iommu/riscv/iommu.c
> @@ -1522,9 +1522,100 @@ static void riscv_iommu_domain_free_nested(struct iommu_domain *domain)
>  	kfree(riscv_domain);
>  }
>  
> +static int riscv_iommu_fix_user_cmd(struct riscv_iommu_command *cmd,
> +				    unsigned int pscid, unsigned int gscid)
> +{
> +	u32 opcode = FIELD_GET(RISCV_IOMMU_CMD_OPCODE, cmd->dword0);
> +
> +	switch (opcode) {
> +	case RISCV_IOMMU_CMD_IOTINVAL_OPCODE:
> +		u32 func = FIELD_GET(RISCV_IOMMU_CMD_FUNC, cmd->dword0);
> +
> +		if (func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA &&
> +		    func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA) {
> +			pr_warn("The IOTINVAL function: 0x%x is not supported\n",
> +				func);
> +			return -EOPNOTSUPP;
> +		}
> +
> +		if (func == RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA) {
> +			cmd->dword0 &= ~RISCV_IOMMU_CMD_FUNC;
> +			cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_FUNC,
> +						  RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
> +		}
> +
> +		cmd->dword0 &= ~(RISCV_IOMMU_CMD_IOTINVAL_PSCID |
> +				 RISCV_IOMMU_CMD_IOTINVAL_GSCID);
> +		riscv_iommu_cmd_inval_set_pscid(cmd, pscid);
> +		riscv_iommu_cmd_inval_set_gscid(cmd, gscid);
> +		break;
> +	case RISCV_IOMMU_CMD_IODIR_OPCODE:
> +		/*
> +		 * Ensure the device ID is right. We expect that VMM has
> +		 * transferred the device ID to host's from guest's.
> +		 */
> +		break;
> +	default:
> +		pr_warn("The user command: 0x%x is not supported\n", opcode);
> +		return -EOPNOTSUPP;

No userspace triggerable warnings.

> +static int riscv_iommu_cache_invalidate_user(struct iommu_domain *domain,
> +					     struct iommu_user_data_array *array)
> +{
> +	struct riscv_iommu_domain *riscv_domain = iommu_domain_to_riscv(domain);
> +	struct riscv_iommu_device *iommu;
> +	struct riscv_iommu_bond *bond;
> +	struct riscv_iommu_command cmd;
> +	struct iommu_hwpt_riscv_iommu_invalidate inv_info;
> +	int ret, index;
> +
> +	if (!riscv_domain)
> +		return -EINVAL;
> +
> +	/* Assume attached devices in the domain go through the same IOMMU device */

No, you can't assume that.

Jason
Zong Li May 7, 2024, 3:35 p.m. UTC | #2
On Tue, May 7, 2024 at 11:08 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Tue, May 07, 2024 at 10:26:00PM +0800, Zong Li wrote:
> > This patch implements cache_invalidate_user operation for the userspace
> > to flush the hardware caches for a nested domain through iommufd.
> >
> > Signed-off-by: Zong Li <zong.li@sifive.com>
> > ---
> >  drivers/iommu/riscv/iommu.c  | 91 ++++++++++++++++++++++++++++++++++++
> >  include/uapi/linux/iommufd.h |  9 ++++
> >  2 files changed, 100 insertions(+)
> >
> > diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
> > index 7eda850df475..4dd58fe2242d 100644
> > --- a/drivers/iommu/riscv/iommu.c
> > +++ b/drivers/iommu/riscv/iommu.c
> > @@ -1522,9 +1522,100 @@ static void riscv_iommu_domain_free_nested(struct iommu_domain *domain)
> >       kfree(riscv_domain);
> >  }
> >
> > +static int riscv_iommu_fix_user_cmd(struct riscv_iommu_command *cmd,
> > +                                 unsigned int pscid, unsigned int gscid)
> > +{
> > +     u32 opcode = FIELD_GET(RISCV_IOMMU_CMD_OPCODE, cmd->dword0);
> > +
> > +     switch (opcode) {
> > +     case RISCV_IOMMU_CMD_IOTINVAL_OPCODE:
> > +             u32 func = FIELD_GET(RISCV_IOMMU_CMD_FUNC, cmd->dword0);
> > +
> > +             if (func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA &&
> > +                 func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA) {
> > +                     pr_warn("The IOTINVAL function: 0x%x is not supported\n",
> > +                             func);
> > +                     return -EOPNOTSUPP;
> > +             }
> > +
> > +             if (func == RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA) {
> > +                     cmd->dword0 &= ~RISCV_IOMMU_CMD_FUNC;
> > +                     cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_FUNC,
> > +                                               RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
> > +             }
> > +
> > +             cmd->dword0 &= ~(RISCV_IOMMU_CMD_IOTINVAL_PSCID |
> > +                              RISCV_IOMMU_CMD_IOTINVAL_GSCID);
> > +             riscv_iommu_cmd_inval_set_pscid(cmd, pscid);
> > +             riscv_iommu_cmd_inval_set_gscid(cmd, gscid);
> > +             break;
> > +     case RISCV_IOMMU_CMD_IODIR_OPCODE:
> > +             /*
> > +              * Ensure the device ID is right. We expect that VMM has
> > +              * transferred the device ID to host's from guest's.
> > +              */
> > +             break;
> > +     default:
> > +             pr_warn("The user command: 0x%x is not supported\n", opcode);
> > +             return -EOPNOTSUPP;
>
> No userspace triggerable warnings.

I don't complete understand about this. Could I know whether we should
suppress the message and return the error directly, or if we should
convert the warning to an error (i.e. pr_err)?

>
> > +static int riscv_iommu_cache_invalidate_user(struct iommu_domain *domain,
> > +                                          struct iommu_user_data_array *array)
> > +{
> > +     struct riscv_iommu_domain *riscv_domain = iommu_domain_to_riscv(domain);
> > +     struct riscv_iommu_device *iommu;
> > +     struct riscv_iommu_bond *bond;
> > +     struct riscv_iommu_command cmd;
> > +     struct iommu_hwpt_riscv_iommu_invalidate inv_info;
> > +     int ret, index;
> > +
> > +     if (!riscv_domain)
> > +             return -EINVAL;
> > +
> > +     /* Assume attached devices in the domain go through the same IOMMU device */
>
> No, you can't assume that.

Do you think that it makes sense to add a riscv_iommu_device structure
in riscv_iommu_domain? Or we might need to add some data structure to
build the mapping of the riscv_iommu_device and riscv_iommu_domain,
then we can get the corresponding riscv_iommu_device by
riscv_iommu_domain?
Thanks

>
> Jason
Jason Gunthorpe May 7, 2024, 4:27 p.m. UTC | #3
On Tue, May 07, 2024 at 11:35:16PM +0800, Zong Li wrote:
> > > +     default:
> > > +             pr_warn("The user command: 0x%x is not supported\n", opcode);
> > > +             return -EOPNOTSUPP;
> >
> > No userspace triggerable warnings.
> 
> I don't complete understand about this. Could I know whether we should
> suppress the message and return the error directly, or if we should
> convert the warning to an error (i.e. pr_err)?

A userspace system call should never print to dmesg. Return an error
code. Put a pr_debug if you really care.

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 7eda850df475..4dd58fe2242d 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -1522,9 +1522,100 @@  static void riscv_iommu_domain_free_nested(struct iommu_domain *domain)
 	kfree(riscv_domain);
 }
 
+static int riscv_iommu_fix_user_cmd(struct riscv_iommu_command *cmd,
+				    unsigned int pscid, unsigned int gscid)
+{
+	u32 opcode = FIELD_GET(RISCV_IOMMU_CMD_OPCODE, cmd->dword0);
+
+	switch (opcode) {
+	case RISCV_IOMMU_CMD_IOTINVAL_OPCODE:
+		u32 func = FIELD_GET(RISCV_IOMMU_CMD_FUNC, cmd->dword0);
+
+		if (func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA &&
+		    func != RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA) {
+			pr_warn("The IOTINVAL function: 0x%x is not supported\n",
+				func);
+			return -EOPNOTSUPP;
+		}
+
+		if (func == RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA) {
+			cmd->dword0 &= ~RISCV_IOMMU_CMD_FUNC;
+			cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_FUNC,
+						  RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
+		}
+
+		cmd->dword0 &= ~(RISCV_IOMMU_CMD_IOTINVAL_PSCID |
+				 RISCV_IOMMU_CMD_IOTINVAL_GSCID);
+		riscv_iommu_cmd_inval_set_pscid(cmd, pscid);
+		riscv_iommu_cmd_inval_set_gscid(cmd, gscid);
+		break;
+	case RISCV_IOMMU_CMD_IODIR_OPCODE:
+		/*
+		 * Ensure the device ID is right. We expect that VMM has
+		 * transferred the device ID to host's from guest's.
+		 */
+		break;
+	default:
+		pr_warn("The user command: 0x%x is not supported\n", opcode);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int riscv_iommu_cache_invalidate_user(struct iommu_domain *domain,
+					     struct iommu_user_data_array *array)
+{
+	struct riscv_iommu_domain *riscv_domain = iommu_domain_to_riscv(domain);
+	struct riscv_iommu_device *iommu;
+	struct riscv_iommu_bond *bond;
+	struct riscv_iommu_command cmd;
+	struct iommu_hwpt_riscv_iommu_invalidate inv_info;
+	int ret, index;
+
+	if (!riscv_domain)
+		return -EINVAL;
+
+	/* Assume attached devices in the domain go through the same IOMMU device */
+	spin_lock(&riscv_domain->lock);
+	list_for_each_entry_rcu(bond, &riscv_domain->bonds, list) {
+		if (bond->dev) {
+			iommu = dev_to_iommu(bond->dev);
+			break;
+		}
+	}
+	spin_unlock(&riscv_domain->lock);
+
+	if (!iommu)
+		return -EINVAL;
+
+	for (index = 0; index < array->entry_num; index++) {
+		ret = iommu_copy_struct_from_user_array(&inv_info, array,
+							IOMMU_HWPT_DATA_RISCV_IOMMU,
+							index, cmd);
+		if (ret)
+			break;
+
+		ret = riscv_iommu_fix_user_cmd((struct riscv_iommu_command *)inv_info.cmd,
+					       riscv_domain->pscid,
+					       riscv_domain->s2->gscid);
+		if (ret == -EOPNOTSUPP)
+			continue;
+
+		riscv_iommu_cmd_send(iommu, (struct riscv_iommu_command *)inv_info.cmd, 0);
+		riscv_iommu_cmd_iofence(&cmd);
+		riscv_iommu_cmd_send(iommu, &cmd, RISCV_IOMMU_QUEUE_TIMEOUT);
+	}
+
+	array->entry_num = index;
+
+	return ret;
+}
+
 static const struct iommu_domain_ops riscv_iommu_nested_domain_ops = {
 	.attach_dev	= riscv_iommu_attach_dev_nested,
 	.free		= riscv_iommu_domain_free_nested,
+	.cache_invalidate_user = riscv_iommu_cache_invalidate_user,
 };
 
 static int
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index e10b6e236647..d93a8f11813d 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -689,6 +689,15 @@  struct iommu_hwpt_vtd_s1_invalidate {
 	__u32 __reserved;
 };
 
+/**
+ * struct iommu_hwpt_riscv_iommu_invalidate - RISCV IOMMU cache invalidation
+ *                                            (IOMMU_HWPT_TYPE_RISCV_IOMMU)
+ * @cmd: An array holds a command for cache invalidation
+ */
+struct iommu_hwpt_riscv_iommu_invalidate {
+	__aligned_u64 cmd[2];
+};
+
 /**
  * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
  * @size: sizeof(struct iommu_hwpt_invalidate)