@@ -125,6 +125,33 @@ struct vfio_regions {
#define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \
(!list_empty(&iommu->domain_list))
+struct domain_capsule {
+ struct iommu_domain *domain;
+ void *data;
+};
+
+/* iommu->lock must be held */
+static int vfio_iommu_for_each_dev(struct vfio_iommu *iommu,
+ int (*fn)(struct device *dev, void *data),
+ void *data)
+{
+ struct domain_capsule dc = {.data = data};
+ struct vfio_domain *d;
+ struct vfio_group *g;
+ int ret = 0;
+
+ list_for_each_entry(d, &iommu->domain_list, next) {
+ dc.domain = d->domain;
+ list_for_each_entry(g, &d->group_list, next) {
+ ret = iommu_group_for_each_dev(g->iommu_group,
+ &dc, fn);
+ if (ret)
+ break;
+ }
+ }
+ return ret;
+}
+
static int put_pfn(unsigned long pfn, int prot);
/*
@@ -2339,6 +2366,88 @@ static int vfio_iommu_type1_set_pasid_quota(struct vfio_iommu *iommu,
return ret;
}
+static int vfio_bind_gpasid_fn(struct device *dev, void *data)
+{
+ struct domain_capsule *dc = (struct domain_capsule *)data;
+ struct iommu_gpasid_bind_data *gbind_data =
+ (struct iommu_gpasid_bind_data *) dc->data;
+
+ return iommu_sva_bind_gpasid(dc->domain, dev, gbind_data);
+}
+
+static int vfio_unbind_gpasid_fn(struct device *dev, void *data)
+{
+ struct domain_capsule *dc = (struct domain_capsule *)data;
+ struct iommu_gpasid_bind_data *gbind_data =
+ (struct iommu_gpasid_bind_data *) dc->data;
+
+ return iommu_sva_unbind_gpasid(dc->domain, dev,
+ gbind_data->hpasid);
+}
+
+/**
+ * Unbind specific gpasid, caller of this function requires hold
+ * vfio_iommu->lock
+ */
+static long vfio_iommu_type1_do_guest_unbind(struct vfio_iommu *iommu,
+ void *gbind_data)
+{
+ return vfio_iommu_for_each_dev(iommu,
+ vfio_unbind_gpasid_fn, gbind_data);
+}
+
+static long vfio_iommu_type1_bind_gpasid(struct vfio_iommu *iommu,
+ void *gbind_data)
+{
+ int ret = 0;
+
+ mutex_lock(&iommu->lock);
+ if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ ret = vfio_iommu_for_each_dev(iommu,
+ vfio_bind_gpasid_fn, gbind_data);
+ /*
+ * If bind failed, it may not be a total failure. Some devices
+ * within the iommu group may have bind successfully. Although
+ * we don't enable pasid capability for non-singletion iommu
+ * groups, a unbind operation would be helpful to ensure no
+ * partial binding for an iommu group.
+ */
+ if (ret)
+ /*
+ * Undo all binds that already succeeded, no need to
+ * check the return value here since some device within
+ * the group has no successful bind when coming to this
+ * place switch.
+ */
+ vfio_iommu_type1_do_guest_unbind(iommu, gbind_data);
+
+out_unlock:
+ mutex_unlock(&iommu->lock);
+ return ret;
+}
+
+static long vfio_iommu_type1_unbind_gpasid(struct vfio_iommu *iommu,
+ void *gbind_data)
+{
+ int ret = 0;
+
+ mutex_lock(&iommu->lock);
+ if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ ret = vfio_iommu_type1_do_guest_unbind(iommu, gbind_data);
+
+out_unlock:
+ mutex_unlock(&iommu->lock);
+ return ret;
+}
+
static long vfio_iommu_type1_ioctl(void *iommu_data,
unsigned int cmd, unsigned long arg)
{
@@ -2501,6 +2610,49 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
if (quota.argsz < minsz)
return -EINVAL;
return vfio_iommu_type1_set_pasid_quota(iommu, quota.quota);
+
+ } else if (cmd == VFIO_IOMMU_BIND) {
+ struct vfio_iommu_type1_bind bind;
+ u32 version;
+ int data_size;
+ void *gbind_data;
+
+ minsz = offsetofend(struct vfio_iommu_type1_bind, flags);
+
+ if (copy_from_user(&bind, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (bind.argsz < minsz)
+ return -EINVAL;
+
+ /* Get the version of struct iommu_gpasid_bind_data */
+ if (copy_from_user(&version,
+ (void __user *) (arg + minsz),
+ sizeof(version)))
+ return -EFAULT;
+
+ data_size = iommu_uapi_get_data_size(
+ IOMMU_UAPI_BIND_GPASID, version);
+ gbind_data = kzalloc(data_size, GFP_KERNEL);
+ if (!gbind_data)
+ return -ENOMEM;
+
+ if (copy_from_user(gbind_data,
+ (void __user *) (arg + minsz), data_size)) {
+ kfree(gbind_data);
+ return -EFAULT;
+ }
+
+ switch (bind.flags & VFIO_IOMMU_BIND_MASK) {
+ case VFIO_IOMMU_BIND_GUEST_PGTBL:
+ return vfio_iommu_type1_bind_gpasid(iommu,
+ gbind_data);
+ case VFIO_IOMMU_UNBIND_GUEST_PGTBL:
+ return vfio_iommu_type1_unbind_gpasid(iommu,
+ gbind_data);
+ default:
+ return -EINVAL;
+ }
}
return -ENOTTY;
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/ioctl.h>
+#include <linux/iommu.h>
#define VFIO_API_VERSION 0
@@ -874,6 +875,51 @@ struct vfio_iommu_type1_pasid_quota {
*/
#define VFIO_NESTING_GET_IOMMU_UAPI_VERSION _IO(VFIO_TYPE, VFIO_BASE + 24)
+/**
+ * Supported flags:
+ * - VFIO_IOMMU_BIND_GUEST_PGTBL: bind guest page tables to host for
+ * nesting type IOMMUs. In @data field It takes struct
+ * iommu_gpasid_bind_data.
+ * - VFIO_IOMMU_UNBIND_GUEST_PGTBL: undo a bind guest page table operation
+ * invoked by VFIO_IOMMU_BIND_GUEST_PGTBL.
+ *
+ */
+struct vfio_iommu_type1_bind {
+ __u32 argsz;
+ __u32 flags;
+#define VFIO_IOMMU_BIND_GUEST_PGTBL (1 << 0)
+#define VFIO_IOMMU_UNBIND_GUEST_PGTBL (1 << 1)
+ __u8 data[];
+};
+
+#define VFIO_IOMMU_BIND_MASK (VFIO_IOMMU_BIND_GUEST_PGTBL | \
+ VFIO_IOMMU_UNBIND_GUEST_PGTBL)
+
+/**
+ * VFIO_IOMMU_BIND - _IOW(VFIO_TYPE, VFIO_BASE + 25,
+ * struct vfio_iommu_type1_bind)
+ *
+ * Manage address spaces of devices in this container. Initially a TYPE1
+ * container can only have one address space, managed with
+ * VFIO_IOMMU_MAP/UNMAP_DMA.
+ *
+ * An IOMMU of type VFIO_TYPE1_NESTING_IOMMU can be managed by both MAP/UNMAP
+ * and BIND ioctls at the same time. MAP/UNMAP acts on the stage-2 (host) page
+ * tables, and BIND manages the stage-1 (guest) page tables. Other types of
+ * IOMMU may allow MAP/UNMAP and BIND to coexist, where MAP/UNMAP controls
+ * the traffics only require single stage translation while BIND controls the
+ * traffics require nesting translation. But this depends on the underlying
+ * IOMMU architecture and isn't guaranteed. Example of this is the guest SVA
+ * traffics, such traffics need nesting translation to gain gVA->gPA and then
+ * gPA->hPA translation.
+ *
+ * Availability of this feature depends on the device, its bus, the underlying
+ * IOMMU and the CPU architecture.
+ *
+ * returns: 0 on success, -errno on failure.
+ */
+#define VFIO_IOMMU_BIND _IO(VFIO_TYPE, VFIO_BASE + 25)
+
/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
/*