Message ID | 1443624989-24346-1-git-send-email-Bharat.Bhushan@freescale.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, 2015-09-30 at 20:26 +0530, Bharat Bhushan wrote: > This Patch adds the VFIO APIs to add and remove reserved iova > regions. The reserved iova region can be used for mapping some > specific physical address in iommu. > > Currently we are planning to use this interface for adding iova > regions for creating iommu of msi-pages. But the API are designed > for future extension where some other physical address can be mapped. > > Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com> > --- > drivers/vfio/vfio_iommu_type1.c | 201 +++++++++++++++++++++++++++++++++++++++- > include/uapi/linux/vfio.h | 43 +++++++++ > 2 files changed, 243 insertions(+), 1 deletion(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 57d8c37..fa5d3e4 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -59,6 +59,7 @@ struct vfio_iommu { > struct rb_root dma_list; > bool v2; > bool nesting; > + struct list_head reserved_iova_list; This alignment leads to poor packing in the structure, put it above the bools. > }; > > struct vfio_domain { > @@ -77,6 +78,15 @@ struct vfio_dma { > int prot; /* IOMMU_READ/WRITE */ > }; > > +struct vfio_resvd_region { > + dma_addr_t iova; > + size_t size; > + int prot; /* IOMMU_READ/WRITE */ > + int refcount; /* ref count of mappings */ > + uint64_t map_paddr; /* Mapped Physical Address */ phys_addr_t > + struct list_head next; > +}; > + > struct vfio_group { > struct iommu_group *iommu_group; > struct list_head next; > @@ -106,6 +116,38 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, > return NULL; > } > > +/* This function must be called with iommu->lock held */ > +static bool vfio_overlap_with_resvd_region(struct vfio_iommu *iommu, > + dma_addr_t start, size_t size) > +{ > + struct vfio_resvd_region *region; > + > + list_for_each_entry(region, &iommu->reserved_iova_list, next) { > + if (region->iova < start) > + return (start - region->iova < region->size); > + else if (start < region->iova) > + return (region->iova - start < size); <= on both of the return lines? > + > + return (region->size > 0 && size > 0); > + } > + > + return false; > +} > + > +/* This function must be called with iommu->lock held */ > +static > +struct vfio_resvd_region *vfio_find_resvd_region(struct vfio_iommu *iommu, > + dma_addr_t start, size_t size) > +{ > + struct vfio_resvd_region *region; > + > + list_for_each_entry(region, &iommu->reserved_iova_list, next) > + if (region->iova == start && region->size == size) > + return region; > + > + return NULL; > +} > + > static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new) > { > struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL; > @@ -580,7 +622,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, > > mutex_lock(&iommu->lock); > > - if (vfio_find_dma(iommu, iova, size)) { > + if (vfio_find_dma(iommu, iova, size) || > + vfio_overlap_with_resvd_region(iommu, iova, size)) { > mutex_unlock(&iommu->lock); > return -EEXIST; > } > @@ -626,6 +669,127 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, > return ret; > } > > +/* This function must be called with iommu->lock held */ > +static > +int vfio_iommu_resvd_region_del(struct vfio_iommu *iommu, > + dma_addr_t iova, size_t size, int prot) > +{ > + struct vfio_resvd_region *res_region; Have some consistency in naming, just use "region". > + > + res_region = vfio_find_resvd_region(iommu, iova, size); > + /* Region should not be mapped in iommu */ > + if (res_region == NULL || res_region->map_paddr) > + return -EINVAL; Are these two separate errors? !region is -EINVAL, but being mapped is -EBUSY. > + > + list_del(&res_region->next); > + kfree(res_region); > + return 0; > +} > + > +/* This function must be called with iommu->lock held */ > +static int vfio_iommu_resvd_region_add(struct vfio_iommu *iommu, > + dma_addr_t iova, size_t size, int prot) > +{ > + struct vfio_resvd_region *res_region; > + > + /* Check overlap with with dma maping and reserved regions */ > + if (vfio_find_dma(iommu, iova, size) || > + vfio_find_resvd_region(iommu, iova, size)) > + return -EEXIST; > + > + res_region = kzalloc(sizeof(*res_region), GFP_KERNEL); > + if (res_region == NULL) > + return -ENOMEM; > + > + res_region->iova = iova; > + res_region->size = size; > + res_region->prot = prot; > + res_region->refcount = 0; > + res_region->map_paddr = 0; They're already 0 by the kzalloc > + > + list_add(&res_region->next, &iommu->reserved_iova_list); > + > + return 0; > +} > + > +static > +int vfio_handle_reserved_region_add(struct vfio_iommu *iommu, > + struct vfio_iommu_reserved_region_add *region) > +{ > + dma_addr_t iova = region->iova; > + size_t size = region->size; > + int flags = region->flags; > + uint64_t mask; > + int prot = 0; > + int ret; > + > + /* Verify that none of our __u64 fields overflow */ > + if (region->size != size || region->iova != iova) > + return -EINVAL; > + > + mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1; > + > + WARN_ON(mask & PAGE_MASK); > + > + if (flags & VFIO_IOMMU_RES_REGION_READ) > + prot |= IOMMU_READ; > + if (flags & VFIO_IOMMU_RES_REGION_WRITE) > + prot |= IOMMU_WRITE; > + > + if (!prot || !size || (size | iova) & mask) > + return -EINVAL; > + > + /* Don't allow IOVA wrap */ > + if (iova + size - 1 < iova) > + return -EINVAL; > + > + mutex_lock(&iommu->lock); > + > + if (region->flags & VFIO_IOMMU_RES_REGION_ADD) { > + ret = vfio_iommu_resvd_region_add(iommu, iova, size, prot); > + if (ret) { > + mutex_unlock(&iommu->lock); > + return ret; > + } > + } Silently fail if not VFIO_IOMMU_RES_REGION_ADD? > + > + mutex_unlock(&iommu->lock); > + return 0; > +} > + > +static > +int vfio_handle_reserved_region_del(struct vfio_iommu *iommu, > + struct vfio_iommu_reserved_region_del *region) > +{ > + dma_addr_t iova = region->iova; > + size_t size = region->size; > + int flags = region->flags; > + int ret; > + > + if (!(flags & VFIO_IOMMU_RES_REGION_DEL)) > + return -EINVAL; > + > + mutex_lock(&iommu->lock); > + > + /* Check for the region */ > + if (vfio_find_resvd_region(iommu, iova, size) == NULL) { > + mutex_unlock(&iommu->lock); > + return -EINVAL; > + } > + > + /* remove the reserved region */ > + if (region->flags & VFIO_IOMMU_RES_REGION_DEL) { > + ret = vfio_iommu_resvd_region_del(iommu, iova, size, flags); > + if (ret) { > + mutex_unlock(&iommu->lock); > + return ret; > + } > + } > + > + mutex_unlock(&iommu->lock); > + return 0; > +} > + > static int vfio_bus_type(struct device *dev, void *data) > { > struct bus_type **bus = data; > @@ -905,6 +1069,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) > } > > INIT_LIST_HEAD(&iommu->domain_list); > + INIT_LIST_HEAD(&iommu->reserved_iova_list); > iommu->dma_list = RB_ROOT; > mutex_init(&iommu->lock); > > @@ -1020,6 +1185,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, > return ret; > > return copy_to_user((void __user *)arg, &unmap, minsz); > + } else if (cmd == VFIO_IOMMU_RESERVED_REGION_ADD) { > + struct vfio_iommu_reserved_region_add region; > + long ret; > + > + minsz = offsetofend(struct vfio_iommu_reserved_region_add, > + size); > + if (copy_from_user(®ion, (void __user *)arg, minsz)) > + return -EFAULT; > + > + if (region.argsz < minsz) > + return -EINVAL; > + > + ret = vfio_handle_reserved_region_add(iommu, ®ion); > + if (ret) > + return ret; > + > + return copy_to_user((void __user *)arg, ®ion, minsz); > + } else if (cmd == VFIO_IOMMU_RESERVED_REGION_DEL) { > + struct vfio_iommu_reserved_region_del region; > + long ret; > + > + minsz = offsetofend(struct vfio_iommu_reserved_region_del, > + size); > + if (copy_from_user(®ion, (void __user *)arg, minsz)) > + return -EFAULT; > + > + if (region.argsz < minsz) > + return -EINVAL; > + > + ret = vfio_handle_reserved_region_del(iommu, ®ion); > + if (ret) > + return ret; > + > + return copy_to_user((void __user *)arg, ®ion, minsz); So we've just created an interface that is available for all vfio-type1 users, whether it makes any sense for the platform or not, and it allows the user to consume arbitrary amounts of kernel memory, by making an infinitely long list of reserved iova entries, brilliant! > } > > return -ENOTTY; > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index b57b750..1abd1a9 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -440,6 +440,49 @@ struct vfio_iommu_type1_dma_unmap { > #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) > #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) > > +/**************** Reserved IOVA region specific APIs **********************/ > + > +/* > + * VFIO_IOMMU_RESERVED_REGION_ADD - _IO(VFIO_TYPE, VFIO_BASE + 17, > + * struct vfio_iommu_reserved_region_add) > + * This is used to add a reserved iova region. > + * @flags - Input: VFIO_IOMMU_RES_REGION_ADD flag is for adding > + * a reserved region. Why else would we call VFIO_IOMMU_RESERVED_REGION_ADD except to add a region, this flag is redundant. > + * Also pass READ/WRITE/IOMMU flags to be used in iommu mapping. > + * @iova - Input: IOVA base address of reserved region > + * @size - Input: Size of the reserved region > + * Return: 0 on success, -errno on failure > + */ > +struct vfio_iommu_reserved_region_add { > + __u32 argsz; > + __u32 flags; > +#define VFIO_IOMMU_RES_REGION_ADD (1 << 0) /* Add a reserved region */ > +#define VFIO_IOMMU_RES_REGION_READ (1 << 1) /* readable region */ > +#define VFIO_IOMMU_RES_REGION_WRITE (1 << 2) /* writable region */ > + __u64 iova; > + __u64 size; > +}; > +#define VFIO_IOMMU_RESERVED_REGION_ADD _IO(VFIO_TYPE, VFIO_BASE + 17) > + > +/* > + * VFIO_IOMMU_RESERVED_REGION_DEL - _IO(VFIO_TYPE, VFIO_BASE + 18, > + * struct vfio_iommu_reserved_region_del) > + * This is used to delete an existing reserved iova region. > + * @flags - VFIO_IOMMU_RES_REGION_DEL flag is for deleting a region use, > + * only a unmapped region can be deleted. > + * @iova - Input: IOVA base address of reserved region > + * @size - Input: Size of the reserved region > + * Return: 0 on success, -errno on failure > + */ > +struct vfio_iommu_reserved_region_del { > + __u32 argsz; > + __u32 flags; > +#define VFIO_IOMMU_RES_REGION_DEL (1 << 0) /* unset the reserved region */ > + __u64 iova; > + __u64 size; > +}; > +#define VFIO_IOMMU_RESERVED_REGION_DEL _IO(VFIO_TYPE, VFIO_BASE + 18) > + These are effectively both struct vfio_iommu_type1_dma_unmap > /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ > > /* -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
SGkgQWxleCwNCg0KPiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0KPiBGcm9tOiBBbGV4IFdp bGxpYW1zb24gW21haWx0bzphbGV4LndpbGxpYW1zb25AcmVkaGF0LmNvbV0NCj4gU2VudDogU2F0 dXJkYXksIE9jdG9iZXIgMDMsIDIwMTUgNDoxNiBBTQ0KPiBUbzogQmh1c2hhbiBCaGFyYXQtUjY1 Nzc3IDxCaGFyYXQuQmh1c2hhbkBmcmVlc2NhbGUuY29tPg0KPiBDYzoga3ZtYXJtQGxpc3RzLmNz LmNvbHVtYmlhLmVkdTsga3ZtQHZnZXIua2VybmVsLm9yZzsNCj4gY2hyaXN0b2ZmZXIuZGFsbEBs aW5hcm8ub3JnOyBlcmljLmF1Z2VyQGxpbmFyby5vcmc7IHByYW5hdmt1bWFyQGxpbmFyby5vcmc7 DQo+IG1hcmMuenluZ2llckBhcm0uY29tOyB3aWxsLmRlYWNvbkBhcm0uY29tDQo+IFN1YmplY3Q6 IFJlOiBbUkZDIFBBVENIIDEvNl0gdmZpbzogQWRkIGludGVyZmFjZSBmb3IgYWRkL2RlbCByZXNl cnZlZCBpb3ZhDQo+IHJlZ2lvbg0KPiANCj4gT24gV2VkLCAyMDE1LTA5LTMwIGF0IDIwOjI2ICsw NTMwLCBCaGFyYXQgQmh1c2hhbiB3cm90ZToNCj4gPiBUaGlzIFBhdGNoIGFkZHMgdGhlIFZGSU8g QVBJcyB0byBhZGQgYW5kIHJlbW92ZSByZXNlcnZlZCBpb3ZhIHJlZ2lvbnMuDQo+ID4gVGhlIHJl c2VydmVkIGlvdmEgcmVnaW9uIGNhbiBiZSB1c2VkIGZvciBtYXBwaW5nIHNvbWUgc3BlY2lmaWMN Cj4gPiBwaHlzaWNhbCBhZGRyZXNzIGluIGlvbW11Lg0KPiA+DQo+ID4gQ3VycmVudGx5IHdlIGFy ZSBwbGFubmluZyB0byB1c2UgdGhpcyBpbnRlcmZhY2UgZm9yIGFkZGluZyBpb3ZhDQo+ID4gcmVn aW9ucyBmb3IgY3JlYXRpbmcgaW9tbXUgb2YgbXNpLXBhZ2VzLiBCdXQgdGhlIEFQSSBhcmUgZGVz aWduZWQgZm9yDQo+ID4gZnV0dXJlIGV4dGVuc2lvbiB3aGVyZSBzb21lIG90aGVyIHBoeXNpY2Fs IGFkZHJlc3MgY2FuIGJlIG1hcHBlZC4NCj4gPg0KPiA+IFNpZ25lZC1vZmYtYnk6IEJoYXJhdCBC aHVzaGFuIDxCaGFyYXQuQmh1c2hhbkBmcmVlc2NhbGUuY29tPg0KPiA+IC0tLQ0KPiA+ICBkcml2 ZXJzL3ZmaW8vdmZpb19pb21tdV90eXBlMS5jIHwgMjAxDQo+ICsrKysrKysrKysrKysrKysrKysr KysrKysrKysrKysrKysrKysrKy0NCj4gPiAgaW5jbHVkZS91YXBpL2xpbnV4L3ZmaW8uaCAgICAg ICB8ICA0MyArKysrKysrKysNCj4gPiAgMiBmaWxlcyBjaGFuZ2VkLCAyNDMgaW5zZXJ0aW9ucygr KSwgMSBkZWxldGlvbigtKQ0KPiA+DQo+ID4gZGlmZiAtLWdpdCBhL2RyaXZlcnMvdmZpby92Zmlv X2lvbW11X3R5cGUxLmMNCj4gPiBiL2RyaXZlcnMvdmZpby92ZmlvX2lvbW11X3R5cGUxLmMgaW5k ZXggNTdkOGMzNy4uZmE1ZDNlNCAxMDA2NDQNCj4gPiAtLS0gYS9kcml2ZXJzL3ZmaW8vdmZpb19p b21tdV90eXBlMS5jDQo+ID4gKysrIGIvZHJpdmVycy92ZmlvL3ZmaW9faW9tbXVfdHlwZTEuYw0K PiA+IEBAIC01OSw2ICs1OSw3IEBAIHN0cnVjdCB2ZmlvX2lvbW11IHsNCj4gPiAgCXN0cnVjdCBy Yl9yb290CQlkbWFfbGlzdDsNCj4gPiAgCWJvb2wJCQl2MjsNCj4gPiAgCWJvb2wJCQluZXN0aW5n Ow0KPiA+ICsJc3RydWN0IGxpc3RfaGVhZAlyZXNlcnZlZF9pb3ZhX2xpc3Q7DQo+IA0KPiBUaGlz IGFsaWdubWVudCBsZWFkcyB0byBwb29yIHBhY2tpbmcgaW4gdGhlIHN0cnVjdHVyZSwgcHV0IGl0 IGFib3ZlIHRoZSBib29scy4NCg0Kb2sNCg0KPiANCj4gPiAgfTsNCj4gPg0KPiA+ICBzdHJ1Y3Qg dmZpb19kb21haW4gew0KPiA+IEBAIC03Nyw2ICs3OCwxNSBAQCBzdHJ1Y3QgdmZpb19kbWEgew0K PiA+ICAJaW50CQkJcHJvdDsJCS8qIElPTU1VX1JFQUQvV1JJVEUgKi8NCj4gPiAgfTsNCj4gPg0K PiA+ICtzdHJ1Y3QgdmZpb19yZXN2ZF9yZWdpb24gew0KPiA+ICsJZG1hX2FkZHJfdAlpb3ZhOw0K PiA+ICsJc2l6ZV90CQlzaXplOw0KPiA+ICsJaW50CQlwcm90OwkJCS8qIElPTU1VX1JFQUQvV1JJ VEUgKi8NCj4gPiArCWludAkJcmVmY291bnQ7CQkvKiByZWYgY291bnQgb2YgbWFwcGluZ3MgKi8N Cj4gPiArCXVpbnQ2NF90CW1hcF9wYWRkcjsJCS8qIE1hcHBlZCBQaHlzaWNhbCBBZGRyZXNzDQo+ ICovDQo+IA0KPiBwaHlzX2FkZHJfdA0KDQpPaywNCg0KPiANCj4gPiArCXN0cnVjdCBsaXN0X2hl YWQgbmV4dDsNCj4gPiArfTsNCj4gPiArDQo+ID4gIHN0cnVjdCB2ZmlvX2dyb3VwIHsNCj4gPiAg CXN0cnVjdCBpb21tdV9ncm91cAkqaW9tbXVfZ3JvdXA7DQo+ID4gIAlzdHJ1Y3QgbGlzdF9oZWFk CW5leHQ7DQo+ID4gQEAgLTEwNiw2ICsxMTYsMzggQEAgc3RhdGljIHN0cnVjdCB2ZmlvX2RtYSAq dmZpb19maW5kX2RtYShzdHJ1Y3QNCj4gdmZpb19pb21tdSAqaW9tbXUsDQo+ID4gIAlyZXR1cm4g TlVMTDsNCj4gPiAgfQ0KPiA+DQo+ID4gKy8qIFRoaXMgZnVuY3Rpb24gbXVzdCBiZSBjYWxsZWQg d2l0aCBpb21tdS0+bG9jayBoZWxkICovIHN0YXRpYyBib29sDQo+ID4gK3ZmaW9fb3ZlcmxhcF93 aXRoX3Jlc3ZkX3JlZ2lvbihzdHJ1Y3QgdmZpb19pb21tdSAqaW9tbXUsDQo+ID4gKwkJCQkJICAg ZG1hX2FkZHJfdCBzdGFydCwgc2l6ZV90IHNpemUpIHsNCj4gPiArCXN0cnVjdCB2ZmlvX3Jlc3Zk X3JlZ2lvbiAqcmVnaW9uOw0KPiA+ICsNCj4gPiArCWxpc3RfZm9yX2VhY2hfZW50cnkocmVnaW9u LCAmaW9tbXUtPnJlc2VydmVkX2lvdmFfbGlzdCwgbmV4dCkgew0KPiA+ICsJCWlmIChyZWdpb24t PmlvdmEgPCBzdGFydCkNCj4gPiArCQkJcmV0dXJuIChzdGFydCAtIHJlZ2lvbi0+aW92YSA8IHJl Z2lvbi0+c2l6ZSk7DQo+ID4gKwkJZWxzZSBpZiAoc3RhcnQgPCByZWdpb24tPmlvdmEpDQo+ID4g KwkJCXJldHVybiAocmVnaW9uLT5pb3ZhIC0gc3RhcnQgPCBzaXplKTsNCj4gDQo+IDw9IG9uIGJv dGggb2YgdGhlIHJldHVybiBsaW5lcz8NCg0KSSB0aGluayBpcyBzaG91bGQgYmUgIjwiIGFuZCBu b3QgIj08Iiwgbm8gPw0KDQo+IA0KPiA+ICsNCj4gPiArCQlyZXR1cm4gKHJlZ2lvbi0+c2l6ZSA+ IDAgJiYgc2l6ZSA+IDApOw0KPiA+ICsJfQ0KPiA+ICsNCj4gPiArCXJldHVybiBmYWxzZTsNCj4g PiArfQ0KPiA+ICsNCj4gPiArLyogVGhpcyBmdW5jdGlvbiBtdXN0IGJlIGNhbGxlZCB3aXRoIGlv bW11LT5sb2NrIGhlbGQgKi8gc3RhdGljDQo+ID4gK3N0cnVjdCB2ZmlvX3Jlc3ZkX3JlZ2lvbiAq dmZpb19maW5kX3Jlc3ZkX3JlZ2lvbihzdHJ1Y3QgdmZpb19pb21tdQ0KPiAqaW9tbXUsDQo+ID4g KwkJCQkJCSBkbWFfYWRkcl90IHN0YXJ0LCBzaXplX3QNCj4gc2l6ZSkgew0KPiA+ICsJc3RydWN0 IHZmaW9fcmVzdmRfcmVnaW9uICpyZWdpb247DQo+ID4gKw0KPiA+ICsJbGlzdF9mb3JfZWFjaF9l bnRyeShyZWdpb24sICZpb21tdS0+cmVzZXJ2ZWRfaW92YV9saXN0LCBuZXh0KQ0KPiA+ICsJCWlm IChyZWdpb24tPmlvdmEgPT0gc3RhcnQgJiYgcmVnaW9uLT5zaXplID09IHNpemUpDQo+ID4gKwkJ CXJldHVybiByZWdpb247DQo+ID4gKw0KPiA+ICsJcmV0dXJuIE5VTEw7DQo+ID4gK30NCj4gPiAr DQo+ID4gIHN0YXRpYyB2b2lkIHZmaW9fbGlua19kbWEoc3RydWN0IHZmaW9faW9tbXUgKmlvbW11 LCBzdHJ1Y3QgdmZpb19kbWENCj4gPiAqbmV3KSAgew0KPiA+ICAJc3RydWN0IHJiX25vZGUgKips aW5rID0gJmlvbW11LT5kbWFfbGlzdC5yYl9ub2RlLCAqcGFyZW50ID0NCj4gTlVMTDsgQEANCj4g PiAtNTgwLDcgKzYyMiw4IEBAIHN0YXRpYyBpbnQgdmZpb19kbWFfZG9fbWFwKHN0cnVjdCB2Zmlv X2lvbW11ICppb21tdSwNCj4gPg0KPiA+ICAJbXV0ZXhfbG9jaygmaW9tbXUtPmxvY2spOw0KPiA+ DQo+ID4gLQlpZiAodmZpb19maW5kX2RtYShpb21tdSwgaW92YSwgc2l6ZSkpIHsNCj4gPiArCWlm ICh2ZmlvX2ZpbmRfZG1hKGlvbW11LCBpb3ZhLCBzaXplKSB8fA0KPiA+ICsJICAgIHZmaW9fb3Zl cmxhcF93aXRoX3Jlc3ZkX3JlZ2lvbihpb21tdSwgaW92YSwgc2l6ZSkpIHsNCj4gPiAgCQltdXRl eF91bmxvY2soJmlvbW11LT5sb2NrKTsNCj4gPiAgCQlyZXR1cm4gLUVFWElTVDsNCj4gPiAgCX0N Cj4gPiBAQCAtNjI2LDYgKzY2OSwxMjcgQEAgc3RhdGljIGludCB2ZmlvX2RtYV9kb19tYXAoc3Ry dWN0IHZmaW9faW9tbXUNCj4gKmlvbW11LA0KPiA+ICAJcmV0dXJuIHJldDsNCj4gPiAgfQ0KPiA+ DQo+ID4gKy8qIFRoaXMgZnVuY3Rpb24gbXVzdCBiZSBjYWxsZWQgd2l0aCBpb21tdS0+bG9jayBo ZWxkICovIHN0YXRpYyBpbnQNCj4gPiArdmZpb19pb21tdV9yZXN2ZF9yZWdpb25fZGVsKHN0cnVj dCB2ZmlvX2lvbW11ICppb21tdSwNCj4gPiArCQkJCWRtYV9hZGRyX3QgaW92YSwgc2l6ZV90IHNp emUsIGludCBwcm90KSB7DQo+ID4gKwlzdHJ1Y3QgdmZpb19yZXN2ZF9yZWdpb24gKnJlc19yZWdp b247DQo+IA0KPiBIYXZlIHNvbWUgY29uc2lzdGVuY3kgaW4gbmFtaW5nLCBqdXN0IHVzZSAicmVn aW9uIi4NCg0KT2ssDQoNCj4gPiArDQo+ID4gKwlyZXNfcmVnaW9uID0gdmZpb19maW5kX3Jlc3Zk X3JlZ2lvbihpb21tdSwgaW92YSwgc2l6ZSk7DQo+ID4gKwkvKiBSZWdpb24gc2hvdWxkIG5vdCBi ZSBtYXBwZWQgaW4gaW9tbXUgKi8NCj4gPiArCWlmIChyZXNfcmVnaW9uID09IE5VTEwgfHwgcmVz X3JlZ2lvbi0+bWFwX3BhZGRyKQ0KPiA+ICsJCXJldHVybiAtRUlOVkFMOw0KPiANCj4gQXJlIHRo ZXNlIHR3byBzZXBhcmF0ZSBlcnJvcnM/ICAhcmVnaW9uIGlzIC1FSU5WQUwsIGJ1dCBiZWluZyBt YXBwZWQgaXMgLQ0KPiBFQlVTWS4NCg0KWWVzLCB3aWxsIHNlcGFyYXRlIHRoZW0uDQoNCj4gDQo+ ID4gKw0KPiA+ICsJbGlzdF9kZWwoJnJlc19yZWdpb24tPm5leHQpOw0KPiA+ICsJa2ZyZWUocmVz X3JlZ2lvbik7DQo+ID4gKwlyZXR1cm4gMDsNCj4gPiArfQ0KPiA+ICsNCj4gPiArLyogVGhpcyBm dW5jdGlvbiBtdXN0IGJlIGNhbGxlZCB3aXRoIGlvbW11LT5sb2NrIGhlbGQgKi8gc3RhdGljIGlu dA0KPiA+ICt2ZmlvX2lvbW11X3Jlc3ZkX3JlZ2lvbl9hZGQoc3RydWN0IHZmaW9faW9tbXUgKmlv bW11LA0KPiA+ICsJCQkJICAgICAgIGRtYV9hZGRyX3QgaW92YSwgc2l6ZV90IHNpemUsIGludCBw cm90KSB7DQo+ID4gKwlzdHJ1Y3QgdmZpb19yZXN2ZF9yZWdpb24gKnJlc19yZWdpb247DQo+ID4g Kw0KPiA+ICsJLyogQ2hlY2sgb3ZlcmxhcCB3aXRoIHdpdGggZG1hIG1hcGluZyBhbmQgcmVzZXJ2 ZWQgcmVnaW9ucyAqLw0KPiA+ICsJaWYgKHZmaW9fZmluZF9kbWEoaW9tbXUsIGlvdmEsIHNpemUp IHx8DQo+ID4gKwkgICAgdmZpb19maW5kX3Jlc3ZkX3JlZ2lvbihpb21tdSwgaW92YSwgc2l6ZSkp DQo+ID4gKwkJcmV0dXJuIC1FRVhJU1Q7DQo+ID4gKw0KPiA+ICsJcmVzX3JlZ2lvbiA9IGt6YWxs b2Moc2l6ZW9mKCpyZXNfcmVnaW9uKSwgR0ZQX0tFUk5FTCk7DQo+ID4gKwlpZiAocmVzX3JlZ2lv biA9PSBOVUxMKQ0KPiA+ICsJCXJldHVybiAtRU5PTUVNOw0KPiA+ICsNCj4gPiArCXJlc19yZWdp b24tPmlvdmEgPSBpb3ZhOw0KPiA+ICsJcmVzX3JlZ2lvbi0+c2l6ZSA9IHNpemU7DQo+ID4gKwly ZXNfcmVnaW9uLT5wcm90ID0gcHJvdDsNCj4gPiArCXJlc19yZWdpb24tPnJlZmNvdW50ID0gMDsN Cj4gPiArCXJlc19yZWdpb24tPm1hcF9wYWRkciA9IDA7DQo+IA0KPiBUaGV5J3JlIGFscmVhZHkg MCBieSB0aGUga3phbGxvYw0KDQpZZXMgOykNCj4gDQo+ID4gKw0KPiA+ICsJbGlzdF9hZGQoJnJl c19yZWdpb24tPm5leHQsICZpb21tdS0+cmVzZXJ2ZWRfaW92YV9saXN0KTsNCj4gPiArDQo+ID4g KwlyZXR1cm4gMDsNCj4gPiArfQ0KPiA+ICsNCj4gPiArc3RhdGljDQo+ID4gK2ludCB2ZmlvX2hh bmRsZV9yZXNlcnZlZF9yZWdpb25fYWRkKHN0cnVjdCB2ZmlvX2lvbW11ICppb21tdSwNCj4gPiAr CQkJCXN0cnVjdCB2ZmlvX2lvbW11X3Jlc2VydmVkX3JlZ2lvbl9hZGQNCj4gKnJlZ2lvbikgew0K PiA+ICsJZG1hX2FkZHJfdCBpb3ZhID0gcmVnaW9uLT5pb3ZhOw0KPiA+ICsJc2l6ZV90IHNpemUg PSByZWdpb24tPnNpemU7DQo+ID4gKwlpbnQgZmxhZ3MgPSByZWdpb24tPmZsYWdzOw0KPiA+ICsJ dWludDY0X3QgbWFzazsNCj4gPiArCWludCBwcm90ID0gMDsNCj4gPiArCWludCByZXQ7DQo+ID4g Kw0KPiA+ICsJLyogVmVyaWZ5IHRoYXQgbm9uZSBvZiBvdXIgX191NjQgZmllbGRzIG92ZXJmbG93 ICovDQo+ID4gKwlpZiAocmVnaW9uLT5zaXplICE9IHNpemUgfHwgcmVnaW9uLT5pb3ZhICE9IGlv dmEpDQo+ID4gKwkJcmV0dXJuIC1FSU5WQUw7DQo+ID4gKw0KPiA+ICsJbWFzayA9ICgodWludDY0 X3QpMSA8PCBfX2Zmcyh2ZmlvX3Bnc2l6ZV9iaXRtYXAoaW9tbXUpKSkgLSAxOw0KPiA+ICsNCj4g PiArCVdBUk5fT04obWFzayAmIFBBR0VfTUFTSyk7DQo+ID4gKw0KPiA+ICsJaWYgKGZsYWdzICYg VkZJT19JT01NVV9SRVNfUkVHSU9OX1JFQUQpDQo+ID4gKwkJcHJvdCB8PSBJT01NVV9SRUFEOw0K PiA+ICsJaWYgKGZsYWdzICYgVkZJT19JT01NVV9SRVNfUkVHSU9OX1dSSVRFKQ0KPiA+ICsJCXBy b3QgfD0gSU9NTVVfV1JJVEU7DQo+ID4gKw0KPiA+ICsJaWYgKCFwcm90IHx8ICFzaXplIHx8IChz aXplIHwgaW92YSkgJiBtYXNrKQ0KPiA+ICsJCXJldHVybiAtRUlOVkFMOw0KPiA+ICsNCj4gPiAr CS8qIERvbid0IGFsbG93IElPVkEgd3JhcCAqLw0KPiA+ICsJaWYgKGlvdmEgKyBzaXplIC0gMSA8 IGlvdmEpDQo+ID4gKwkJcmV0dXJuIC1FSU5WQUw7DQo+ID4gKw0KPiA+ICsJbXV0ZXhfbG9jaygm aW9tbXUtPmxvY2spOw0KPiA+ICsNCj4gPiArCWlmIChyZWdpb24tPmZsYWdzICYgVkZJT19JT01N VV9SRVNfUkVHSU9OX0FERCkgew0KPiA+ICsJCXJldCA9IHZmaW9faW9tbXVfcmVzdmRfcmVnaW9u X2FkZChpb21tdSwgaW92YSwgc2l6ZSwNCj4gcHJvdCk7DQo+ID4gKwkJaWYgKHJldCkgew0KPiA+ ICsJCQltdXRleF91bmxvY2soJmlvbW11LT5sb2NrKTsNCj4gPiArCQkJcmV0dXJuIHJldDsNCj4g PiArCQl9DQo+ID4gKwl9DQo+IA0KPiBTaWxlbnRseSBmYWlsIGlmIG5vdCBWRklPX0lPTU1VX1JF U19SRUdJT05fQUREPw0KDQpBcyBwZXIgYmVsb3cgY29tbWVudCB3ZSBkbyBub3QgbmVlZCB0aGlz IGZsYWcuIFNvIHRoZSBhYm92ZSBmbGFnIGNoZWNraW5nIHdpbGwgYmUgcmVtb3ZlZC4NCg0KPiAN Cj4gPiArDQo+ID4gKwltdXRleF91bmxvY2soJmlvbW11LT5sb2NrKTsNCj4gPiArCXJldHVybiAw Ow0KPiA+ICt9DQo+ID4gKw0KPiA+ICtzdGF0aWMNCj4gPiAraW50IHZmaW9faGFuZGxlX3Jlc2Vy dmVkX3JlZ2lvbl9kZWwoc3RydWN0IHZmaW9faW9tbXUgKmlvbW11LA0KPiA+ICsJCQkJc3RydWN0 IHZmaW9faW9tbXVfcmVzZXJ2ZWRfcmVnaW9uX2RlbA0KPiAqcmVnaW9uKSB7DQo+ID4gKwlkbWFf YWRkcl90IGlvdmEgPSByZWdpb24tPmlvdmE7DQo+ID4gKwlzaXplX3Qgc2l6ZSA9IHJlZ2lvbi0+ c2l6ZTsNCj4gPiArCWludCBmbGFncyA9IHJlZ2lvbi0+ZmxhZ3M7DQo+ID4gKwlpbnQgcmV0Ow0K PiA+ICsNCj4gPiArCWlmICghKGZsYWdzICYgVkZJT19JT01NVV9SRVNfUkVHSU9OX0RFTCkpDQo+ ID4gKwkJcmV0dXJuIC1FSU5WQUw7DQo+ID4gKw0KPiA+ICsJbXV0ZXhfbG9jaygmaW9tbXUtPmxv Y2spOw0KPiA+ICsNCj4gPiArCS8qIENoZWNrIGZvciB0aGUgcmVnaW9uICovDQo+ID4gKwlpZiAo dmZpb19maW5kX3Jlc3ZkX3JlZ2lvbihpb21tdSwgaW92YSwgc2l6ZSkgPT0gTlVMTCkgew0KPiA+ ICsJCW11dGV4X3VubG9jaygmaW9tbXUtPmxvY2spOw0KPiA+ICsJCXJldHVybiAtRUlOVkFMOw0K PiA+ICsJfQ0KPiA+ICsNCj4gPiArCS8qIHJlbW92ZSB0aGUgcmVzZXJ2ZWQgcmVnaW9uICovDQo+ ID4gKwlpZiAocmVnaW9uLT5mbGFncyAmIFZGSU9fSU9NTVVfUkVTX1JFR0lPTl9ERUwpIHsNCj4g PiArCQlyZXQgPSB2ZmlvX2lvbW11X3Jlc3ZkX3JlZ2lvbl9kZWwoaW9tbXUsIGlvdmEsIHNpemUs DQo+IGZsYWdzKTsNCj4gPiArCQlpZiAocmV0KSB7DQo+ID4gKwkJCW11dGV4X3VubG9jaygmaW9t bXUtPmxvY2spOw0KPiA+ICsJCQlyZXR1cm4gcmV0Ow0KPiA+ICsJCX0NCj4gPiArCX0NCj4gPiAr DQo+ID4gKwltdXRleF91bmxvY2soJmlvbW11LT5sb2NrKTsNCj4gPiArCXJldHVybiAwOw0KPiA+ ICt9DQo+ID4gKw0KPiA+ICBzdGF0aWMgaW50IHZmaW9fYnVzX3R5cGUoc3RydWN0IGRldmljZSAq ZGV2LCB2b2lkICpkYXRhKSAgew0KPiA+ICAJc3RydWN0IGJ1c190eXBlICoqYnVzID0gZGF0YTsN Cj4gPiBAQCAtOTA1LDYgKzEwNjksNyBAQCBzdGF0aWMgdm9pZCAqdmZpb19pb21tdV90eXBlMV9v cGVuKHVuc2lnbmVkDQo+IGxvbmcgYXJnKQ0KPiA+ICAJfQ0KPiA+DQo+ID4gIAlJTklUX0xJU1Rf SEVBRCgmaW9tbXUtPmRvbWFpbl9saXN0KTsNCj4gPiArCUlOSVRfTElTVF9IRUFEKCZpb21tdS0+ cmVzZXJ2ZWRfaW92YV9saXN0KTsNCj4gPiAgCWlvbW11LT5kbWFfbGlzdCA9IFJCX1JPT1Q7DQo+ ID4gIAltdXRleF9pbml0KCZpb21tdS0+bG9jayk7DQo+ID4NCj4gPiBAQCAtMTAyMCw2ICsxMTg1 LDQwIEBAIHN0YXRpYyBsb25nIHZmaW9faW9tbXVfdHlwZTFfaW9jdGwodm9pZA0KPiAqaW9tbXVf ZGF0YSwNCj4gPiAgCQkJcmV0dXJuIHJldDsNCj4gPg0KPiA+ICAJCXJldHVybiBjb3B5X3RvX3Vz ZXIoKHZvaWQgX191c2VyICopYXJnLCAmdW5tYXAsIG1pbnN6KTsNCj4gPiArCX0gZWxzZSBpZiAo Y21kID09IFZGSU9fSU9NTVVfUkVTRVJWRURfUkVHSU9OX0FERCkgew0KPiA+ICsJCXN0cnVjdCB2 ZmlvX2lvbW11X3Jlc2VydmVkX3JlZ2lvbl9hZGQgcmVnaW9uOw0KPiA+ICsJCWxvbmcgcmV0Ow0K PiA+ICsNCj4gPiArCQltaW5zeiA9IG9mZnNldG9mZW5kKHN0cnVjdA0KPiB2ZmlvX2lvbW11X3Jl c2VydmVkX3JlZ2lvbl9hZGQsDQo+ID4gKwkJCQkgICAgc2l6ZSk7DQo+ID4gKwkJaWYgKGNvcHlf ZnJvbV91c2VyKCZyZWdpb24sICh2b2lkIF9fdXNlciAqKWFyZywgbWluc3opKQ0KPiA+ICsJCQly ZXR1cm4gLUVGQVVMVDsNCj4gPiArDQo+ID4gKwkJaWYgKHJlZ2lvbi5hcmdzeiA8IG1pbnN6KQ0K PiA+ICsJCQlyZXR1cm4gLUVJTlZBTDsNCj4gPiArDQo+ID4gKwkJcmV0ID0gdmZpb19oYW5kbGVf cmVzZXJ2ZWRfcmVnaW9uX2FkZChpb21tdSwgJnJlZ2lvbik7DQo+ID4gKwkJaWYgKHJldCkNCj4g PiArCQkJcmV0dXJuIHJldDsNCj4gPiArDQo+ID4gKwkJcmV0dXJuIGNvcHlfdG9fdXNlcigodm9p ZCBfX3VzZXIgKilhcmcsICZyZWdpb24sIG1pbnN6KTsNCj4gPiArCX0gZWxzZSBpZiAoY21kID09 IFZGSU9fSU9NTVVfUkVTRVJWRURfUkVHSU9OX0RFTCkgew0KPiA+ICsJCXN0cnVjdCB2ZmlvX2lv bW11X3Jlc2VydmVkX3JlZ2lvbl9kZWwgcmVnaW9uOw0KPiA+ICsJCWxvbmcgcmV0Ow0KPiA+ICsN Cj4gPiArCQltaW5zeiA9IG9mZnNldG9mZW5kKHN0cnVjdA0KPiB2ZmlvX2lvbW11X3Jlc2VydmVk X3JlZ2lvbl9kZWwsDQo+ID4gKwkJCQkgICAgc2l6ZSk7DQo+ID4gKwkJaWYgKGNvcHlfZnJvbV91 c2VyKCZyZWdpb24sICh2b2lkIF9fdXNlciAqKWFyZywgbWluc3opKQ0KPiA+ICsJCQlyZXR1cm4g LUVGQVVMVDsNCj4gPiArDQo+ID4gKwkJaWYgKHJlZ2lvbi5hcmdzeiA8IG1pbnN6KQ0KPiA+ICsJ CQlyZXR1cm4gLUVJTlZBTDsNCj4gPiArDQo+ID4gKwkJcmV0ID0gdmZpb19oYW5kbGVfcmVzZXJ2 ZWRfcmVnaW9uX2RlbChpb21tdSwgJnJlZ2lvbik7DQo+ID4gKwkJaWYgKHJldCkNCj4gPiArCQkJ cmV0dXJuIHJldDsNCj4gPiArDQo+ID4gKwkJcmV0dXJuIGNvcHlfdG9fdXNlcigodm9pZCBfX3Vz ZXIgKilhcmcsICZyZWdpb24sIG1pbnN6KTsNCj4gDQo+IFNvIHdlJ3ZlIGp1c3QgY3JlYXRlZCBh biBpbnRlcmZhY2UgdGhhdCBpcyBhdmFpbGFibGUgZm9yIGFsbCB2ZmlvLXR5cGUxIHVzZXJzLA0K PiB3aGV0aGVyIGl0IG1ha2VzIGFueSBzZW5zZSBmb3IgdGhlIHBsYXRmb3JtIG9yIG5vdCwNCg0K SG93IHdlIHNob3VsZCBkZWNpZGUgdGhhdCBhIGdpdmVuIHBsYXRmb3JtIG5lZWRzIHRoaXMgb3Ig bm90Pw0KDQo+IGFuZCBpdCBhbGxvd3MgdGhlIHVzZXIgdG8NCj4gY29uc3VtZSBhcmJpdHJhcnkg YW1vdW50cyBvZiBrZXJuZWwgbWVtb3J5LCBieSBtYWtpbmcgYW4gaW5maW5pdGVseSBsb25nDQo+ IGxpc3Qgb2YgcmVzZXJ2ZWQgaW92YSBlbnRyaWVzLCBicmlsbGlhbnQhDQoNCkkgd2FzIG5vdCBz dXJlIG9mIGhvdyB0byBsaW1pdCB0aGUgdXNlci4gV2hhdCBJIHdhcyB0aGlua2luZyBvZiBoYXZp bmcgYSBkZWZhdWx0IG51bWJlciBvZiBwYWdlcyBhIHVzZXIgY2FuIHJlc2VydmUgKDUxMiBwYWdl cykuIEFsc28gd2UgY2FuIGdpdmUgYSBzeXNmcyBpbnRlcmZhY2Ugc28gdGhhdCB1c2VyIGNhbiBj aGFuZ2UgdGhlIGRlZmF1bHQgbnVtYmVyIG9mIHBhZ2VzLiBEb2VzIHRoaXMgc291bmQgZ29vZD8g SWYgbm90IHBsZWFzZSBzdWdnZXN0Pw0KDQo+IA0KPiA+ICAJfQ0KPiA+DQo+ID4gIAlyZXR1cm4g LUVOT1RUWTsNCj4gPiBkaWZmIC0tZ2l0IGEvaW5jbHVkZS91YXBpL2xpbnV4L3ZmaW8uaCBiL2lu Y2x1ZGUvdWFwaS9saW51eC92ZmlvLmgNCj4gPiBpbmRleCBiNTdiNzUwLi4xYWJkMWE5IDEwMDY0 NA0KPiA+IC0tLSBhL2luY2x1ZGUvdWFwaS9saW51eC92ZmlvLmgNCj4gPiArKysgYi9pbmNsdWRl L3VhcGkvbGludXgvdmZpby5oDQo+ID4gQEAgLTQ0MCw2ICs0NDAsNDkgQEAgc3RydWN0IHZmaW9f aW9tbXVfdHlwZTFfZG1hX3VubWFwIHsNCj4gPiAgI2RlZmluZSBWRklPX0lPTU1VX0VOQUJMRQlf SU8oVkZJT19UWVBFLCBWRklPX0JBU0UgKyAxNSkNCj4gPiAgI2RlZmluZSBWRklPX0lPTU1VX0RJ U0FCTEUJX0lPKFZGSU9fVFlQRSwgVkZJT19CQVNFICsgMTYpDQo+ID4NCj4gPiArLyoqKioqKioq KioqKioqKiogUmVzZXJ2ZWQgSU9WQSByZWdpb24gc3BlY2lmaWMgQVBJcw0KPiA+ICsqKioqKioq KioqKioqKioqKioqKioqLw0KPiA+ICsNCj4gPiArLyoNCj4gPiArICogVkZJT19JT01NVV9SRVNF UlZFRF9SRUdJT05fQUREIC0gX0lPKFZGSU9fVFlQRSwgVkZJT19CQVNFDQo+ICsgMTcsDQo+ID4g KyAqCQkJCQlzdHJ1Y3QNCj4gdmZpb19pb21tdV9yZXNlcnZlZF9yZWdpb25fYWRkKQ0KPiA+ICsg KiBUaGlzIGlzIHVzZWQgdG8gYWRkIGEgcmVzZXJ2ZWQgaW92YSByZWdpb24uDQo+ID4gKyAqIEBm bGFncyAtIElucHV0OiBWRklPX0lPTU1VX1JFU19SRUdJT05fQUREIGZsYWcgaXMgZm9yIGFkZGlu Zw0KPiA+ICsgKiBhIHJlc2VydmVkIHJlZ2lvbi4NCj4gDQo+IFdoeSBlbHNlIHdvdWxkIHdlIGNh bGwgVkZJT19JT01NVV9SRVNFUlZFRF9SRUdJT05fQUREIGV4Y2VwdCB0bw0KPiBhZGQgYSByZWdp b24sIHRoaXMgZmxhZyBpcyByZWR1bmRhbnQuDQoNCk9rLCB3aWxsIHJlbW92ZSB0aGlzLg0KDQo+ IA0KPiA+ICsgKiBBbHNvIHBhc3MgUkVBRC9XUklURS9JT01NVSBmbGFncyB0byBiZSB1c2VkIGlu IGlvbW11IG1hcHBpbmcuDQo+ID4gKyAqIEBpb3ZhIC0gSW5wdXQ6IElPVkEgYmFzZSBhZGRyZXNz IG9mIHJlc2VydmVkIHJlZ2lvbg0KPiA+ICsgKiBAc2l6ZSAtIElucHV0OiBTaXplIG9mIHRoZSBy ZXNlcnZlZCByZWdpb24NCj4gPiArICogUmV0dXJuOiAwIG9uIHN1Y2Nlc3MsIC1lcnJubyBvbiBm YWlsdXJlICAqLyBzdHJ1Y3QNCj4gPiArdmZpb19pb21tdV9yZXNlcnZlZF9yZWdpb25fYWRkIHsN Cj4gPiArCV9fdTMyICAgYXJnc3o7DQo+ID4gKwlfX3UzMiAgIGZsYWdzOw0KPiA+ICsjZGVmaW5l IFZGSU9fSU9NTVVfUkVTX1JFR0lPTl9BREQJKDEgPDwgMCkgLyogQWRkIGEgcmVzZXJ2ZWQNCj4g cmVnaW9uICovDQo+ID4gKyNkZWZpbmUgVkZJT19JT01NVV9SRVNfUkVHSU9OX1JFQUQJKDEgPDwg MSkgLyogcmVhZGFibGUgcmVnaW9uICovDQo+ID4gKyNkZWZpbmUgVkZJT19JT01NVV9SRVNfUkVH SU9OX1dSSVRFCSgxIDw8IDIpIC8qIHdyaXRhYmxlDQo+IHJlZ2lvbiAqLw0KPiA+ICsJX191NjQJ aW92YTsNCj4gPiArCV9fdTY0ICAgc2l6ZTsNCj4gPiArfTsNCj4gPiArI2RlZmluZSBWRklPX0lP TU1VX1JFU0VSVkVEX1JFR0lPTl9BREQgX0lPKFZGSU9fVFlQRSwNCj4gVkZJT19CQVNFICsgMTcp DQo+ID4gKw0KPiA+ICsvKg0KPiA+ICsgKiBWRklPX0lPTU1VX1JFU0VSVkVEX1JFR0lPTl9ERUwg LSBfSU8oVkZJT19UWVBFLCBWRklPX0JBU0UgKw0KPiAxOCwNCj4gPiArICoJCQkJCXN0cnVjdA0K PiB2ZmlvX2lvbW11X3Jlc2VydmVkX3JlZ2lvbl9kZWwpDQo+ID4gKyAqIFRoaXMgaXMgdXNlZCB0 byBkZWxldGUgYW4gZXhpc3RpbmcgcmVzZXJ2ZWQgaW92YSByZWdpb24uDQo+ID4gKyAqIEBmbGFn cyAtIFZGSU9fSU9NTVVfUkVTX1JFR0lPTl9ERUwgZmxhZyBpcyBmb3IgZGVsZXRpbmcgYSByZWdp b24NCj4gPiArdXNlLA0KPiA+ICsgKiAgb25seSBhIHVubWFwcGVkIHJlZ2lvbiBjYW4gYmUgZGVs ZXRlZC4NCj4gPiArICogQGlvdmEgLSBJbnB1dDogSU9WQSBiYXNlIGFkZHJlc3Mgb2YgcmVzZXJ2 ZWQgcmVnaW9uDQo+ID4gKyAqIEBzaXplIC0gSW5wdXQ6IFNpemUgb2YgdGhlIHJlc2VydmVkIHJl Z2lvbg0KPiA+ICsgKiBSZXR1cm46IDAgb24gc3VjY2VzcywgLWVycm5vIG9uIGZhaWx1cmUgICov IHN0cnVjdA0KPiA+ICt2ZmlvX2lvbW11X3Jlc2VydmVkX3JlZ2lvbl9kZWwgew0KPiA+ICsJX191 MzIgICBhcmdzejsNCj4gPiArCV9fdTMyICAgZmxhZ3M7DQo+ID4gKyNkZWZpbmUgVkZJT19JT01N VV9SRVNfUkVHSU9OX0RFTAkoMSA8PCAwKSAvKiB1bnNldCB0aGUNCj4gcmVzZXJ2ZWQgcmVnaW9u ICovDQo+ID4gKwlfX3U2NAlpb3ZhOw0KPiA+ICsJX191NjQgICBzaXplOw0KPiA+ICt9Ow0KPiA+ ICsjZGVmaW5lIFZGSU9fSU9NTVVfUkVTRVJWRURfUkVHSU9OX0RFTCBfSU8oVkZJT19UWVBFLA0K PiBWRklPX0JBU0UgKyAxOCkNCj4gPiArDQo+IA0KPiBUaGVzZSBhcmUgZWZmZWN0aXZlbHkgYm90 aA0KPiANCj4gc3RydWN0IHZmaW9faW9tbXVfdHlwZTFfZG1hX3VubWFwDQoNClllcywgZG8geW91 IHdhbnQgdG8gc3VnZ2VzdCB0aGF0IHdlIHNob3VsZCB1c2UgIiBzdHJ1Y3QgdmZpb19pb21tdV90 eXBlMV9kbWFfdW5tYXAiLiBJIGZvdW5kIHRoYXQgY29uZnVzaW5nLg0KV2hhdCBpcyB3ZSB1c2Ug InN0cnVjdCB2ZmlvX2lvbW11X3Jlc2VydmVkX3JlZ2lvbiIgYW5kIHVzZSBmbGFnIFZGSU9fSU9N TVVfUkVTX1JFR0lPTl9ERUwvIFZGSU9fSU9NTVVfUkVTX1JFR0lPTl9BREQuDQoNClRoYW5rcw0K LUJoYXJhdA0KDQo+IA0KPiA+ICAvKiAtLS0tLS0tLSBBZGRpdGlvbmFsIEFQSSBmb3IgU1BBUFIg VENFIChTZXJ2ZXIgUE9XRVJQQykgSU9NTVUNCj4gPiAtLS0tLS0tLSAqLw0KPiA+DQo+ID4gIC8q DQo+IA0KPiANCg0K -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2015-10-05 at 04:55 +0000, Bhushan Bharat wrote: > Hi Alex, > > > -----Original Message----- > > From: Alex Williamson [mailto:alex.williamson@redhat.com] > > Sent: Saturday, October 03, 2015 4:16 AM > > To: Bhushan Bharat-R65777 <Bharat.Bhushan@freescale.com> > > Cc: kvmarm@lists.cs.columbia.edu; kvm@vger.kernel.org; > > christoffer.dall@linaro.org; eric.auger@linaro.org; pranavkumar@linaro.org; > > marc.zyngier@arm.com; will.deacon@arm.com > > Subject: Re: [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova > > region > > > > On Wed, 2015-09-30 at 20:26 +0530, Bharat Bhushan wrote: > > > This Patch adds the VFIO APIs to add and remove reserved iova regions. > > > The reserved iova region can be used for mapping some specific > > > physical address in iommu. > > > > > > Currently we are planning to use this interface for adding iova > > > regions for creating iommu of msi-pages. But the API are designed for > > > future extension where some other physical address can be mapped. > > > > > > Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com> > > > --- > > > drivers/vfio/vfio_iommu_type1.c | 201 > > +++++++++++++++++++++++++++++++++++++++- > > > include/uapi/linux/vfio.h | 43 +++++++++ > > > 2 files changed, 243 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/vfio/vfio_iommu_type1.c > > > b/drivers/vfio/vfio_iommu_type1.c index 57d8c37..fa5d3e4 100644 > > > --- a/drivers/vfio/vfio_iommu_type1.c > > > +++ b/drivers/vfio/vfio_iommu_type1.c > > > @@ -59,6 +59,7 @@ struct vfio_iommu { > > > struct rb_root dma_list; > > > bool v2; > > > bool nesting; > > > + struct list_head reserved_iova_list; > > > > This alignment leads to poor packing in the structure, put it above the bools. > > ok > > > > > > }; > > > > > > struct vfio_domain { > > > @@ -77,6 +78,15 @@ struct vfio_dma { > > > int prot; /* IOMMU_READ/WRITE */ > > > }; > > > > > > +struct vfio_resvd_region { > > > + dma_addr_t iova; > > > + size_t size; > > > + int prot; /* IOMMU_READ/WRITE */ > > > + int refcount; /* ref count of mappings */ > > > + uint64_t map_paddr; /* Mapped Physical Address > > */ > > > > phys_addr_t > > Ok, > > > > > > + struct list_head next; > > > +}; > > > + > > > struct vfio_group { > > > struct iommu_group *iommu_group; > > > struct list_head next; > > > @@ -106,6 +116,38 @@ static struct vfio_dma *vfio_find_dma(struct > > vfio_iommu *iommu, > > > return NULL; > > > } > > > > > > +/* This function must be called with iommu->lock held */ static bool > > > +vfio_overlap_with_resvd_region(struct vfio_iommu *iommu, > > > + dma_addr_t start, size_t size) { > > > + struct vfio_resvd_region *region; > > > + > > > + list_for_each_entry(region, &iommu->reserved_iova_list, next) { > > > + if (region->iova < start) > > > + return (start - region->iova < region->size); > > > + else if (start < region->iova) > > > + return (region->iova - start < size); > > > > <= on both of the return lines? > > I think is should be "<" and not "=<", no ? Yep, looks like you're right. Maybe there's a more straightforward way to do this. > > > > > + > > > + return (region->size > 0 && size > 0); > > > + } > > > + > > > + return false; > > > +} > > > + > > > +/* This function must be called with iommu->lock held */ static > > > +struct vfio_resvd_region *vfio_find_resvd_region(struct vfio_iommu > > *iommu, > > > + dma_addr_t start, size_t > > size) { > > > + struct vfio_resvd_region *region; > > > + > > > + list_for_each_entry(region, &iommu->reserved_iova_list, next) > > > + if (region->iova == start && region->size == size) > > > + return region; > > > + > > > + return NULL; > > > +} > > > + > > > static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma > > > *new) { > > > struct rb_node **link = &iommu->dma_list.rb_node, *parent = > > NULL; @@ > > > -580,7 +622,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, > > > > > > mutex_lock(&iommu->lock); > > > > > > - if (vfio_find_dma(iommu, iova, size)) { > > > + if (vfio_find_dma(iommu, iova, size) || > > > + vfio_overlap_with_resvd_region(iommu, iova, size)) { > > > mutex_unlock(&iommu->lock); > > > return -EEXIST; > > > } > > > @@ -626,6 +669,127 @@ static int vfio_dma_do_map(struct vfio_iommu > > *iommu, > > > return ret; > > > } > > > > > > +/* This function must be called with iommu->lock held */ static int > > > +vfio_iommu_resvd_region_del(struct vfio_iommu *iommu, > > > + dma_addr_t iova, size_t size, int prot) { > > > + struct vfio_resvd_region *res_region; > > > > Have some consistency in naming, just use "region". > > Ok, > > > > + > > > + res_region = vfio_find_resvd_region(iommu, iova, size); > > > + /* Region should not be mapped in iommu */ > > > + if (res_region == NULL || res_region->map_paddr) > > > + return -EINVAL; > > > > Are these two separate errors? !region is -EINVAL, but being mapped is - > > EBUSY. > > Yes, will separate them. > > > > > > + > > > + list_del(&res_region->next); > > > + kfree(res_region); > > > + return 0; > > > +} > > > + > > > +/* This function must be called with iommu->lock held */ static int > > > +vfio_iommu_resvd_region_add(struct vfio_iommu *iommu, > > > + dma_addr_t iova, size_t size, int prot) { > > > + struct vfio_resvd_region *res_region; > > > + > > > + /* Check overlap with with dma maping and reserved regions */ > > > + if (vfio_find_dma(iommu, iova, size) || > > > + vfio_find_resvd_region(iommu, iova, size)) > > > + return -EEXIST; > > > + > > > + res_region = kzalloc(sizeof(*res_region), GFP_KERNEL); > > > + if (res_region == NULL) > > > + return -ENOMEM; > > > + > > > + res_region->iova = iova; > > > + res_region->size = size; > > > + res_region->prot = prot; > > > + res_region->refcount = 0; > > > + res_region->map_paddr = 0; > > > > They're already 0 by the kzalloc > > Yes ;) > > > > > + > > > + list_add(&res_region->next, &iommu->reserved_iova_list); > > > + > > > + return 0; > > > +} > > > + > > > +static > > > +int vfio_handle_reserved_region_add(struct vfio_iommu *iommu, > > > + struct vfio_iommu_reserved_region_add > > *region) { > > > + dma_addr_t iova = region->iova; > > > + size_t size = region->size; > > > + int flags = region->flags; > > > + uint64_t mask; > > > + int prot = 0; > > > + int ret; > > > + > > > + /* Verify that none of our __u64 fields overflow */ > > > + if (region->size != size || region->iova != iova) > > > + return -EINVAL; > > > + > > > + mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1; > > > + > > > + WARN_ON(mask & PAGE_MASK); > > > + > > > + if (flags & VFIO_IOMMU_RES_REGION_READ) > > > + prot |= IOMMU_READ; > > > + if (flags & VFIO_IOMMU_RES_REGION_WRITE) > > > + prot |= IOMMU_WRITE; > > > + > > > + if (!prot || !size || (size | iova) & mask) > > > + return -EINVAL; > > > + > > > + /* Don't allow IOVA wrap */ > > > + if (iova + size - 1 < iova) > > > + return -EINVAL; > > > + > > > + mutex_lock(&iommu->lock); > > > + > > > + if (region->flags & VFIO_IOMMU_RES_REGION_ADD) { > > > + ret = vfio_iommu_resvd_region_add(iommu, iova, size, > > prot); > > > + if (ret) { > > > + mutex_unlock(&iommu->lock); > > > + return ret; > > > + } > > > + } > > > > Silently fail if not VFIO_IOMMU_RES_REGION_ADD? > > As per below comment we do not need this flag. So the above flag checking will be removed. > > > > > > + > > > + mutex_unlock(&iommu->lock); > > > + return 0; > > > +} > > > + > > > +static > > > +int vfio_handle_reserved_region_del(struct vfio_iommu *iommu, > > > + struct vfio_iommu_reserved_region_del > > *region) { > > > + dma_addr_t iova = region->iova; > > > + size_t size = region->size; > > > + int flags = region->flags; > > > + int ret; > > > + > > > + if (!(flags & VFIO_IOMMU_RES_REGION_DEL)) > > > + return -EINVAL; > > > + > > > + mutex_lock(&iommu->lock); > > > + > > > + /* Check for the region */ > > > + if (vfio_find_resvd_region(iommu, iova, size) == NULL) { > > > + mutex_unlock(&iommu->lock); > > > + return -EINVAL; > > > + } > > > + > > > + /* remove the reserved region */ > > > + if (region->flags & VFIO_IOMMU_RES_REGION_DEL) { > > > + ret = vfio_iommu_resvd_region_del(iommu, iova, size, > > flags); > > > + if (ret) { > > > + mutex_unlock(&iommu->lock); > > > + return ret; > > > + } > > > + } > > > + > > > + mutex_unlock(&iommu->lock); > > > + return 0; > > > +} > > > + > > > static int vfio_bus_type(struct device *dev, void *data) { > > > struct bus_type **bus = data; > > > @@ -905,6 +1069,7 @@ static void *vfio_iommu_type1_open(unsigned > > long arg) > > > } > > > > > > INIT_LIST_HEAD(&iommu->domain_list); > > > + INIT_LIST_HEAD(&iommu->reserved_iova_list); > > > iommu->dma_list = RB_ROOT; > > > mutex_init(&iommu->lock); > > > > > > @@ -1020,6 +1185,40 @@ static long vfio_iommu_type1_ioctl(void > > *iommu_data, > > > return ret; > > > > > > return copy_to_user((void __user *)arg, &unmap, minsz); > > > + } else if (cmd == VFIO_IOMMU_RESERVED_REGION_ADD) { > > > + struct vfio_iommu_reserved_region_add region; > > > + long ret; > > > + > > > + minsz = offsetofend(struct > > vfio_iommu_reserved_region_add, > > > + size); > > > + if (copy_from_user(®ion, (void __user *)arg, minsz)) > > > + return -EFAULT; > > > + > > > + if (region.argsz < minsz) > > > + return -EINVAL; > > > + > > > + ret = vfio_handle_reserved_region_add(iommu, ®ion); > > > + if (ret) > > > + return ret; > > > + > > > + return copy_to_user((void __user *)arg, ®ion, minsz); > > > + } else if (cmd == VFIO_IOMMU_RESERVED_REGION_DEL) { > > > + struct vfio_iommu_reserved_region_del region; > > > + long ret; > > > + > > > + minsz = offsetofend(struct > > vfio_iommu_reserved_region_del, > > > + size); > > > + if (copy_from_user(®ion, (void __user *)arg, minsz)) > > > + return -EFAULT; > > > + > > > + if (region.argsz < minsz) > > > + return -EINVAL; > > > + > > > + ret = vfio_handle_reserved_region_del(iommu, ®ion); > > > + if (ret) > > > + return ret; > > > + > > > + return copy_to_user((void __user *)arg, ®ion, minsz); > > > > So we've just created an interface that is available for all vfio-type1 users, > > whether it makes any sense for the platform or not, > > How we should decide that a given platform needs this or not? You later add new iommu interfaces, presumably if the iommu doesn't implement those interfaces then there's no point in us exposing these interfaces to vfio. > > and it allows the user to > > consume arbitrary amounts of kernel memory, by making an infinitely long > > list of reserved iova entries, brilliant! > > I was not sure of how to limit the user. What I was thinking of having a default number of pages a user can reserve (512 pages). Also we can give a sysfs interface so that user can change the default number of pages. Does this sound good? If not please suggest? Isn't 512 entries a lot for a linked list? Can we use our existing rb tree to manage these entries rather than a secondary list? How many entries do we realistically need? Can the iommu callbacks help give us a limit? Can we somehow use information about the devices in the group to produce a limit, ie. MSI vectors possible from the group? > > > > > } > > > > > > return -ENOTTY; > > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > > > index b57b750..1abd1a9 100644 > > > --- a/include/uapi/linux/vfio.h > > > +++ b/include/uapi/linux/vfio.h > > > @@ -440,6 +440,49 @@ struct vfio_iommu_type1_dma_unmap { > > > #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) > > > #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) > > > > > > +/**************** Reserved IOVA region specific APIs > > > +**********************/ > > > + > > > +/* > > > + * VFIO_IOMMU_RESERVED_REGION_ADD - _IO(VFIO_TYPE, VFIO_BASE > > + 17, > > > + * struct > > vfio_iommu_reserved_region_add) > > > + * This is used to add a reserved iova region. > > > + * @flags - Input: VFIO_IOMMU_RES_REGION_ADD flag is for adding > > > + * a reserved region. > > > > Why else would we call VFIO_IOMMU_RESERVED_REGION_ADD except to > > add a region, this flag is redundant. > > Ok, will remove this. > > > > > > + * Also pass READ/WRITE/IOMMU flags to be used in iommu mapping. > > > + * @iova - Input: IOVA base address of reserved region > > > + * @size - Input: Size of the reserved region > > > + * Return: 0 on success, -errno on failure */ struct > > > +vfio_iommu_reserved_region_add { > > > + __u32 argsz; > > > + __u32 flags; > > > +#define VFIO_IOMMU_RES_REGION_ADD (1 << 0) /* Add a reserved > > region */ > > > +#define VFIO_IOMMU_RES_REGION_READ (1 << 1) /* readable region */ > > > +#define VFIO_IOMMU_RES_REGION_WRITE (1 << 2) /* writable > > region */ > > > + __u64 iova; > > > + __u64 size; > > > +}; > > > +#define VFIO_IOMMU_RESERVED_REGION_ADD _IO(VFIO_TYPE, > > VFIO_BASE + 17) > > > + > > > +/* > > > + * VFIO_IOMMU_RESERVED_REGION_DEL - _IO(VFIO_TYPE, VFIO_BASE + > > 18, > > > + * struct > > vfio_iommu_reserved_region_del) > > > + * This is used to delete an existing reserved iova region. > > > + * @flags - VFIO_IOMMU_RES_REGION_DEL flag is for deleting a region > > > +use, > > > + * only a unmapped region can be deleted. > > > + * @iova - Input: IOVA base address of reserved region > > > + * @size - Input: Size of the reserved region > > > + * Return: 0 on success, -errno on failure */ struct > > > +vfio_iommu_reserved_region_del { > > > + __u32 argsz; > > > + __u32 flags; > > > +#define VFIO_IOMMU_RES_REGION_DEL (1 << 0) /* unset the > > reserved region */ > > > + __u64 iova; > > > + __u64 size; > > > +}; > > > +#define VFIO_IOMMU_RESERVED_REGION_DEL _IO(VFIO_TYPE, > > VFIO_BASE + 18) > > > + > > > > These are effectively both > > > > struct vfio_iommu_type1_dma_unmap > > Yes, do you want to suggest that we should use " struct vfio_iommu_type1_dma_unmap". I found that confusing. > What is we use "struct vfio_iommu_reserved_region" and use flag VFIO_IOMMU_RES_REGION_DEL/ VFIO_IOMMU_RES_REGION_ADD. What if we just use the existing map and unmap interface with a flag to indicate an MSI reserved mapping? I don't really see why we need new ioctls for this. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Alex Williamson [mailto:alex.williamson@redhat.com] > Sent: Tuesday, October 06, 2015 4:15 AM > To: Bhushan Bharat-R65777 <Bharat.Bhushan@freescale.com> > Cc: kvmarm@lists.cs.columbia.edu; kvm@vger.kernel.org; > christoffer.dall@linaro.org; eric.auger@linaro.org; pranavkumar@linaro.org; > marc.zyngier@arm.com; will.deacon@arm.com > Subject: Re: [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova > region > > On Mon, 2015-10-05 at 04:55 +0000, Bhushan Bharat wrote: > > Hi Alex, > > > > > -----Original Message----- > > > From: Alex Williamson [mailto:alex.williamson@redhat.com] > > > Sent: Saturday, October 03, 2015 4:16 AM > > > To: Bhushan Bharat-R65777 <Bharat.Bhushan@freescale.com> > > > Cc: kvmarm@lists.cs.columbia.edu; kvm@vger.kernel.org; > > > christoffer.dall@linaro.org; eric.auger@linaro.org; > > > pranavkumar@linaro.org; marc.zyngier@arm.com; will.deacon@arm.com > > > Subject: Re: [RFC PATCH 1/6] vfio: Add interface for add/del > > > reserved iova region > > > > > > On Wed, 2015-09-30 at 20:26 +0530, Bharat Bhushan wrote: > > > > This Patch adds the VFIO APIs to add and remove reserved iova regions. > > > > The reserved iova region can be used for mapping some specific > > > > physical address in iommu. > > > > > > > > Currently we are planning to use this interface for adding iova > > > > regions for creating iommu of msi-pages. But the API are designed > > > > for future extension where some other physical address can be > mapped. > > > > > > > > Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com> > > > > --- > > > > drivers/vfio/vfio_iommu_type1.c | 201 > > > +++++++++++++++++++++++++++++++++++++++- > > > > include/uapi/linux/vfio.h | 43 +++++++++ > > > > 2 files changed, 243 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/drivers/vfio/vfio_iommu_type1.c > > > > b/drivers/vfio/vfio_iommu_type1.c index 57d8c37..fa5d3e4 100644 > > > > --- a/drivers/vfio/vfio_iommu_type1.c > > > > +++ b/drivers/vfio/vfio_iommu_type1.c > > > > @@ -59,6 +59,7 @@ struct vfio_iommu { > > > > struct rb_root dma_list; > > > > bool v2; > > > > bool nesting; > > > > + struct list_head reserved_iova_list; > > > > > > This alignment leads to poor packing in the structure, put it above the > bools. > > > > ok > > > > > > > > > }; > > > > > > > > struct vfio_domain { > > > > @@ -77,6 +78,15 @@ struct vfio_dma { > > > > int prot; /* IOMMU_READ/WRITE */ > > > > }; > > > > > > > > +struct vfio_resvd_region { > > > > + dma_addr_t iova; > > > > + size_t size; > > > > + int prot; /* IOMMU_READ/WRITE */ > > > > + int refcount; /* ref count of mappings */ > > > > + uint64_t map_paddr; /* Mapped Physical Address > > > */ > > > > > > phys_addr_t > > > > Ok, > > > > > > > > > + struct list_head next; > > > > +}; > > > > + > > > > struct vfio_group { > > > > struct iommu_group *iommu_group; > > > > struct list_head next; > > > > @@ -106,6 +116,38 @@ static struct vfio_dma *vfio_find_dma(struct > > > vfio_iommu *iommu, > > > > return NULL; > > > > } > > > > > > > > +/* This function must be called with iommu->lock held */ static > > > > +bool vfio_overlap_with_resvd_region(struct vfio_iommu *iommu, > > > > + dma_addr_t start, size_t size) { > > > > + struct vfio_resvd_region *region; > > > > + > > > > + list_for_each_entry(region, &iommu->reserved_iova_list, next) { > > > > + if (region->iova < start) > > > > + return (start - region->iova < region->size); > > > > + else if (start < region->iova) > > > > + return (region->iova - start < size); > > > > > > <= on both of the return lines? > > > > I think is should be "<" and not "=<", no ? > > Yep, looks like you're right. Maybe there's a more straightforward way to do > this. > > > > > > > > + > > > > + return (region->size > 0 && size > 0); > > > > + } > > > > + > > > > + return false; > > > > +} > > > > + > > > > +/* This function must be called with iommu->lock held */ static > > > > +struct vfio_resvd_region *vfio_find_resvd_region(struct > > > > +vfio_iommu > > > *iommu, > > > > + dma_addr_t start, size_t > > > size) { > > > > + struct vfio_resvd_region *region; > > > > + > > > > + list_for_each_entry(region, &iommu->reserved_iova_list, next) > > > > + if (region->iova == start && region->size == size) > > > > + return region; > > > > + > > > > + return NULL; > > > > +} > > > > + > > > > static void vfio_link_dma(struct vfio_iommu *iommu, struct > > > > vfio_dma > > > > *new) { > > > > struct rb_node **link = &iommu->dma_list.rb_node, *parent = > > > NULL; @@ > > > > -580,7 +622,8 @@ static int vfio_dma_do_map(struct vfio_iommu > > > > *iommu, > > > > > > > > mutex_lock(&iommu->lock); > > > > > > > > - if (vfio_find_dma(iommu, iova, size)) { > > > > + if (vfio_find_dma(iommu, iova, size) || > > > > + vfio_overlap_with_resvd_region(iommu, iova, size)) { > > > > mutex_unlock(&iommu->lock); > > > > return -EEXIST; > > > > } > > > > @@ -626,6 +669,127 @@ static int vfio_dma_do_map(struct > vfio_iommu > > > *iommu, > > > > return ret; > > > > } > > > > > > > > +/* This function must be called with iommu->lock held */ static > > > > +int vfio_iommu_resvd_region_del(struct vfio_iommu *iommu, > > > > + dma_addr_t iova, size_t size, int prot) { > > > > + struct vfio_resvd_region *res_region; > > > > > > Have some consistency in naming, just use "region". > > > > Ok, > > > > > > + > > > > + res_region = vfio_find_resvd_region(iommu, iova, size); > > > > + /* Region should not be mapped in iommu */ > > > > + if (res_region == NULL || res_region->map_paddr) > > > > + return -EINVAL; > > > > > > Are these two separate errors? !region is -EINVAL, but being mapped > > > is - EBUSY. > > > > Yes, will separate them. > > > > > > > > > + > > > > + list_del(&res_region->next); > > > > + kfree(res_region); > > > > + return 0; > > > > +} > > > > + > > > > +/* This function must be called with iommu->lock held */ static > > > > +int vfio_iommu_resvd_region_add(struct vfio_iommu *iommu, > > > > + dma_addr_t iova, size_t size, int prot) { > > > > + struct vfio_resvd_region *res_region; > > > > + > > > > + /* Check overlap with with dma maping and reserved regions */ > > > > + if (vfio_find_dma(iommu, iova, size) || > > > > + vfio_find_resvd_region(iommu, iova, size)) > > > > + return -EEXIST; > > > > + > > > > + res_region = kzalloc(sizeof(*res_region), GFP_KERNEL); > > > > + if (res_region == NULL) > > > > + return -ENOMEM; > > > > + > > > > + res_region->iova = iova; > > > > + res_region->size = size; > > > > + res_region->prot = prot; > > > > + res_region->refcount = 0; > > > > + res_region->map_paddr = 0; > > > > > > They're already 0 by the kzalloc > > > > Yes ;) > > > > > > > + > > > > + list_add(&res_region->next, &iommu->reserved_iova_list); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +static > > > > +int vfio_handle_reserved_region_add(struct vfio_iommu *iommu, > > > > + struct vfio_iommu_reserved_region_add > > > *region) { > > > > + dma_addr_t iova = region->iova; > > > > + size_t size = region->size; > > > > + int flags = region->flags; > > > > + uint64_t mask; > > > > + int prot = 0; > > > > + int ret; > > > > + > > > > + /* Verify that none of our __u64 fields overflow */ > > > > + if (region->size != size || region->iova != iova) > > > > + return -EINVAL; > > > > + > > > > + mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1; > > > > + > > > > + WARN_ON(mask & PAGE_MASK); > > > > + > > > > + if (flags & VFIO_IOMMU_RES_REGION_READ) > > > > + prot |= IOMMU_READ; > > > > + if (flags & VFIO_IOMMU_RES_REGION_WRITE) > > > > + prot |= IOMMU_WRITE; > > > > + > > > > + if (!prot || !size || (size | iova) & mask) > > > > + return -EINVAL; > > > > + > > > > + /* Don't allow IOVA wrap */ > > > > + if (iova + size - 1 < iova) > > > > + return -EINVAL; > > > > + > > > > + mutex_lock(&iommu->lock); > > > > + > > > > + if (region->flags & VFIO_IOMMU_RES_REGION_ADD) { > > > > + ret = vfio_iommu_resvd_region_add(iommu, iova, size, > > > prot); > > > > + if (ret) { > > > > + mutex_unlock(&iommu->lock); > > > > + return ret; > > > > + } > > > > + } > > > > > > Silently fail if not VFIO_IOMMU_RES_REGION_ADD? > > > > As per below comment we do not need this flag. So the above flag > checking will be removed. > > > > > > > > > + > > > > + mutex_unlock(&iommu->lock); > > > > + return 0; > > > > +} > > > > + > > > > +static > > > > +int vfio_handle_reserved_region_del(struct vfio_iommu *iommu, > > > > + struct vfio_iommu_reserved_region_del > > > *region) { > > > > + dma_addr_t iova = region->iova; > > > > + size_t size = region->size; > > > > + int flags = region->flags; > > > > + int ret; > > > > + > > > > + if (!(flags & VFIO_IOMMU_RES_REGION_DEL)) > > > > + return -EINVAL; > > > > + > > > > + mutex_lock(&iommu->lock); > > > > + > > > > + /* Check for the region */ > > > > + if (vfio_find_resvd_region(iommu, iova, size) == NULL) { > > > > + mutex_unlock(&iommu->lock); > > > > + return -EINVAL; > > > > + } > > > > + > > > > + /* remove the reserved region */ > > > > + if (region->flags & VFIO_IOMMU_RES_REGION_DEL) { > > > > + ret = vfio_iommu_resvd_region_del(iommu, iova, size, > > > flags); > > > > + if (ret) { > > > > + mutex_unlock(&iommu->lock); > > > > + return ret; > > > > + } > > > > + } > > > > + > > > > + mutex_unlock(&iommu->lock); > > > > + return 0; > > > > +} > > > > + > > > > static int vfio_bus_type(struct device *dev, void *data) { > > > > struct bus_type **bus = data; > > > > @@ -905,6 +1069,7 @@ static void > *vfio_iommu_type1_open(unsigned > > > long arg) > > > > } > > > > > > > > INIT_LIST_HEAD(&iommu->domain_list); > > > > + INIT_LIST_HEAD(&iommu->reserved_iova_list); > > > > iommu->dma_list = RB_ROOT; > > > > mutex_init(&iommu->lock); > > > > > > > > @@ -1020,6 +1185,40 @@ static long vfio_iommu_type1_ioctl(void > > > *iommu_data, > > > > return ret; > > > > > > > > return copy_to_user((void __user *)arg, &unmap, minsz); > > > > + } else if (cmd == VFIO_IOMMU_RESERVED_REGION_ADD) { > > > > + struct vfio_iommu_reserved_region_add region; > > > > + long ret; > > > > + > > > > + minsz = offsetofend(struct > > > vfio_iommu_reserved_region_add, > > > > + size); > > > > + if (copy_from_user(®ion, (void __user *)arg, minsz)) > > > > + return -EFAULT; > > > > + > > > > + if (region.argsz < minsz) > > > > + return -EINVAL; > > > > + > > > > + ret = vfio_handle_reserved_region_add(iommu, ®ion); > > > > + if (ret) > > > > + return ret; > > > > + > > > > + return copy_to_user((void __user *)arg, ®ion, minsz); > > > > + } else if (cmd == VFIO_IOMMU_RESERVED_REGION_DEL) { > > > > + struct vfio_iommu_reserved_region_del region; > > > > + long ret; > > > > + > > > > + minsz = offsetofend(struct > > > vfio_iommu_reserved_region_del, > > > > + size); > > > > + if (copy_from_user(®ion, (void __user *)arg, minsz)) > > > > + return -EFAULT; > > > > + > > > > + if (region.argsz < minsz) > > > > + return -EINVAL; > > > > + > > > > + ret = vfio_handle_reserved_region_del(iommu, ®ion); > > > > + if (ret) > > > > + return ret; > > > > + > > > > + return copy_to_user((void __user *)arg, ®ion, minsz); > > > > > > So we've just created an interface that is available for all > > > vfio-type1 users, whether it makes any sense for the platform or > > > not, > > > > How we should decide that a given platform needs this or not? > > You later add new iommu interfaces, presumably if the iommu doesn't > implement those interfaces then there's no point in us exposing these > interfaces to vfio. You mean if an iommu says "does not requires explicit-iommu-mapping MSIs" then if user-space calls these reserved-iova ioctls then we should return error without adding these regions, right? > > > > and it allows the user to > > > consume arbitrary amounts of kernel memory, by making an infinitely > > > long list of reserved iova entries, brilliant! > > > > I was not sure of how to limit the user. What I was thinking of having a > default number of pages a user can reserve (512 pages). Also we can give a > sysfs interface so that user can change the default number of pages. Does > this sound good? If not please suggest? > > Isn't 512 entries a lot for a linked list? > Can we use our existing rb tree to manage these entries rather than a secondary list? I do not think so, it will complicate the code. > How many entries do we realistically need? I think this should be small number of entries as discussed on other patch. Small number means 1 or 2 max on PowerPC and smmu (I think so). > Can the iommu callbacks help give us a limit? I am not sure right now how deterministically we can get number of regions/msi-pages needed for a given platform and for a list of devices in a group? > Can we somehow use information about the devices in the group to produce a limit, > ie. MSI vectors possible from the group? It seems like we need to know number of vectors needed per device in the group and we needed to know how many vectors can supported using the reserved msi-page. We are sharing a msi-page then it becomes a little more complicated. But overall it seems like we need a 2-3 or very small number of regions and I will suggest that we can go with this small number can enhance if required later on. > > > > > > > > } > > > > > > > > return -ENOTTY; > > > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > > > > index b57b750..1abd1a9 100644 > > > > --- a/include/uapi/linux/vfio.h > > > > +++ b/include/uapi/linux/vfio.h > > > > @@ -440,6 +440,49 @@ struct vfio_iommu_type1_dma_unmap { > > > > #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) > > > > #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) > > > > > > > > +/**************** Reserved IOVA region specific APIs > > > > +**********************/ > > > > + > > > > +/* > > > > + * VFIO_IOMMU_RESERVED_REGION_ADD - _IO(VFIO_TYPE, > VFIO_BASE > > > + 17, > > > > + * struct > > > vfio_iommu_reserved_region_add) > > > > + * This is used to add a reserved iova region. > > > > + * @flags - Input: VFIO_IOMMU_RES_REGION_ADD flag is for adding > > > > + * a reserved region. > > > > > > Why else would we call VFIO_IOMMU_RESERVED_REGION_ADD except > to add > > > a region, this flag is redundant. > > > > Ok, will remove this. > > > > > > > > > + * Also pass READ/WRITE/IOMMU flags to be used in iommu mapping. > > > > + * @iova - Input: IOVA base address of reserved region > > > > + * @size - Input: Size of the reserved region > > > > + * Return: 0 on success, -errno on failure */ struct > > > > +vfio_iommu_reserved_region_add { > > > > + __u32 argsz; > > > > + __u32 flags; > > > > +#define VFIO_IOMMU_RES_REGION_ADD (1 << 0) /* Add a > reserved > > > region */ > > > > +#define VFIO_IOMMU_RES_REGION_READ (1 << 1) /* readable > region */ > > > > +#define VFIO_IOMMU_RES_REGION_WRITE (1 << 2) /* writable > > > region */ > > > > + __u64 iova; > > > > + __u64 size; > > > > +}; > > > > +#define VFIO_IOMMU_RESERVED_REGION_ADD _IO(VFIO_TYPE, > > > VFIO_BASE + 17) > > > > + > > > > +/* > > > > + * VFIO_IOMMU_RESERVED_REGION_DEL - _IO(VFIO_TYPE, > VFIO_BASE + > > > 18, > > > > + * struct > > > vfio_iommu_reserved_region_del) > > > > + * This is used to delete an existing reserved iova region. > > > > + * @flags - VFIO_IOMMU_RES_REGION_DEL flag is for deleting a > > > > +region use, > > > > + * only a unmapped region can be deleted. > > > > + * @iova - Input: IOVA base address of reserved region > > > > + * @size - Input: Size of the reserved region > > > > + * Return: 0 on success, -errno on failure */ struct > > > > +vfio_iommu_reserved_region_del { > > > > + __u32 argsz; > > > > + __u32 flags; > > > > +#define VFIO_IOMMU_RES_REGION_DEL (1 << 0) /* unset the > > > reserved region */ > > > > + __u64 iova; > > > > + __u64 size; > > > > +}; > > > > +#define VFIO_IOMMU_RESERVED_REGION_DEL _IO(VFIO_TYPE, > > > VFIO_BASE + 18) > > > > + > > > > > > These are effectively both > > > > > > struct vfio_iommu_type1_dma_unmap > > > > Yes, do you want to suggest that we should use " struct > vfio_iommu_type1_dma_unmap". I found that confusing. > > What is we use "struct vfio_iommu_reserved_region" and use flag > VFIO_IOMMU_RES_REGION_DEL/ VFIO_IOMMU_RES_REGION_ADD. > > What if we just use the existing map and unmap interface with a flag to > indicate an MSI reserved mapping? I don't really see why we need new ioctls > for this. Existing map/unmap APIs uses virtual address, user-space mmap() and provide virtual address as well. I think in previous discussion you commented that let's keep then separate, over-riding complicates the map/unmap and can be cause of confusion, and keeping these separate also gives space to extend these reserved iova APIs for something else in future (do not know for what right now). I personally think that it make sense to keep them separate, let me know if you think otherwise Thanks -Bharat
On Tue, 2015-10-06 at 09:39 +0000, Bhushan Bharat wrote: > > > > -----Original Message----- > > From: Alex Williamson [mailto:alex.williamson@redhat.com] > > Sent: Tuesday, October 06, 2015 4:15 AM > > To: Bhushan Bharat-R65777 <Bharat.Bhushan@freescale.com> > > Cc: kvmarm@lists.cs.columbia.edu; kvm@vger.kernel.org; > > christoffer.dall@linaro.org; eric.auger@linaro.org; pranavkumar@linaro.org; > > marc.zyngier@arm.com; will.deacon@arm.com > > Subject: Re: [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova > > region > > > > On Mon, 2015-10-05 at 04:55 +0000, Bhushan Bharat wrote: > > > Hi Alex, > > > > > > > -----Original Message----- > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com] > > > > Sent: Saturday, October 03, 2015 4:16 AM > > > > To: Bhushan Bharat-R65777 <Bharat.Bhushan@freescale.com> > > > > Cc: kvmarm@lists.cs.columbia.edu; kvm@vger.kernel.org; > > > > christoffer.dall@linaro.org; eric.auger@linaro.org; > > > > pranavkumar@linaro.org; marc.zyngier@arm.com; will.deacon@arm.com > > > > Subject: Re: [RFC PATCH 1/6] vfio: Add interface for add/del > > > > reserved iova region > > > > > > > > On Wed, 2015-09-30 at 20:26 +0530, Bharat Bhushan wrote: > > > > > This Patch adds the VFIO APIs to add and remove reserved iova regions. > > > > > The reserved iova region can be used for mapping some specific > > > > > physical address in iommu. > > > > > > > > > > Currently we are planning to use this interface for adding iova > > > > > regions for creating iommu of msi-pages. But the API are designed > > > > > for future extension where some other physical address can be > > mapped. > > > > > > > > > > Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com> > > > > > --- > > > > > drivers/vfio/vfio_iommu_type1.c | 201 > > > > +++++++++++++++++++++++++++++++++++++++- > > > > > include/uapi/linux/vfio.h | 43 +++++++++ > > > > > 2 files changed, 243 insertions(+), 1 deletion(-) > > > > > > > > > > diff --git a/drivers/vfio/vfio_iommu_type1.c > > > > > b/drivers/vfio/vfio_iommu_type1.c index 57d8c37..fa5d3e4 100644 > > > > > --- a/drivers/vfio/vfio_iommu_type1.c > > > > > +++ b/drivers/vfio/vfio_iommu_type1.c > > > > > @@ -59,6 +59,7 @@ struct vfio_iommu { > > > > > struct rb_root dma_list; > > > > > bool v2; > > > > > bool nesting; > > > > > + struct list_head reserved_iova_list; > > > > > > > > This alignment leads to poor packing in the structure, put it above the > > bools. > > > > > > ok > > > > > > > > > > > > }; > > > > > > > > > > struct vfio_domain { > > > > > @@ -77,6 +78,15 @@ struct vfio_dma { > > > > > int prot; /* IOMMU_READ/WRITE */ > > > > > }; > > > > > > > > > > +struct vfio_resvd_region { > > > > > + dma_addr_t iova; > > > > > + size_t size; > > > > > + int prot; /* IOMMU_READ/WRITE */ > > > > > + int refcount; /* ref count of mappings */ > > > > > + uint64_t map_paddr; /* Mapped Physical Address > > > > */ > > > > > > > > phys_addr_t > > > > > > Ok, > > > > > > > > > > > > + struct list_head next; > > > > > +}; > > > > > + > > > > > struct vfio_group { > > > > > struct iommu_group *iommu_group; > > > > > struct list_head next; > > > > > @@ -106,6 +116,38 @@ static struct vfio_dma *vfio_find_dma(struct > > > > vfio_iommu *iommu, > > > > > return NULL; > > > > > } > > > > > > > > > > +/* This function must be called with iommu->lock held */ static > > > > > +bool vfio_overlap_with_resvd_region(struct vfio_iommu *iommu, > > > > > + dma_addr_t start, size_t size) { > > > > > + struct vfio_resvd_region *region; > > > > > + > > > > > + list_for_each_entry(region, &iommu->reserved_iova_list, next) { > > > > > + if (region->iova < start) > > > > > + return (start - region->iova < region->size); > > > > > + else if (start < region->iova) > > > > > + return (region->iova - start < size); > > > > > > > > <= on both of the return lines? > > > > > > I think is should be "<" and not "=<", no ? > > > > Yep, looks like you're right. Maybe there's a more straightforward way to do > > this. > > > > > > > > > > > + > > > > > + return (region->size > 0 && size > 0); > > > > > + } > > > > > + > > > > > + return false; > > > > > +} > > > > > + > > > > > +/* This function must be called with iommu->lock held */ static > > > > > +struct vfio_resvd_region *vfio_find_resvd_region(struct > > > > > +vfio_iommu > > > > *iommu, > > > > > + dma_addr_t start, size_t > > > > size) { > > > > > + struct vfio_resvd_region *region; > > > > > + > > > > > + list_for_each_entry(region, &iommu->reserved_iova_list, next) > > > > > + if (region->iova == start && region->size == size) > > > > > + return region; > > > > > + > > > > > + return NULL; > > > > > +} > > > > > + > > > > > static void vfio_link_dma(struct vfio_iommu *iommu, struct > > > > > vfio_dma > > > > > *new) { > > > > > struct rb_node **link = &iommu->dma_list.rb_node, *parent = > > > > NULL; @@ > > > > > -580,7 +622,8 @@ static int vfio_dma_do_map(struct vfio_iommu > > > > > *iommu, > > > > > > > > > > mutex_lock(&iommu->lock); > > > > > > > > > > - if (vfio_find_dma(iommu, iova, size)) { > > > > > + if (vfio_find_dma(iommu, iova, size) || > > > > > + vfio_overlap_with_resvd_region(iommu, iova, size)) { > > > > > mutex_unlock(&iommu->lock); > > > > > return -EEXIST; > > > > > } > > > > > @@ -626,6 +669,127 @@ static int vfio_dma_do_map(struct > > vfio_iommu > > > > *iommu, > > > > > return ret; > > > > > } > > > > > > > > > > +/* This function must be called with iommu->lock held */ static > > > > > +int vfio_iommu_resvd_region_del(struct vfio_iommu *iommu, > > > > > + dma_addr_t iova, size_t size, int prot) { > > > > > + struct vfio_resvd_region *res_region; > > > > > > > > Have some consistency in naming, just use "region". > > > > > > Ok, > > > > > > > > + > > > > > + res_region = vfio_find_resvd_region(iommu, iova, size); > > > > > + /* Region should not be mapped in iommu */ > > > > > + if (res_region == NULL || res_region->map_paddr) > > > > > + return -EINVAL; > > > > > > > > Are these two separate errors? !region is -EINVAL, but being mapped > > > > is - EBUSY. > > > > > > Yes, will separate them. > > > > > > > > > > > > + > > > > > + list_del(&res_region->next); > > > > > + kfree(res_region); > > > > > + return 0; > > > > > +} > > > > > + > > > > > +/* This function must be called with iommu->lock held */ static > > > > > +int vfio_iommu_resvd_region_add(struct vfio_iommu *iommu, > > > > > + dma_addr_t iova, size_t size, int prot) { > > > > > + struct vfio_resvd_region *res_region; > > > > > + > > > > > + /* Check overlap with with dma maping and reserved regions */ > > > > > + if (vfio_find_dma(iommu, iova, size) || > > > > > + vfio_find_resvd_region(iommu, iova, size)) > > > > > + return -EEXIST; > > > > > + > > > > > + res_region = kzalloc(sizeof(*res_region), GFP_KERNEL); > > > > > + if (res_region == NULL) > > > > > + return -ENOMEM; > > > > > + > > > > > + res_region->iova = iova; > > > > > + res_region->size = size; > > > > > + res_region->prot = prot; > > > > > + res_region->refcount = 0; > > > > > + res_region->map_paddr = 0; > > > > > > > > They're already 0 by the kzalloc > > > > > > Yes ;) > > > > > > > > > + > > > > > + list_add(&res_region->next, &iommu->reserved_iova_list); > > > > > + > > > > > + return 0; > > > > > +} > > > > > + > > > > > +static > > > > > +int vfio_handle_reserved_region_add(struct vfio_iommu *iommu, > > > > > + struct vfio_iommu_reserved_region_add > > > > *region) { > > > > > + dma_addr_t iova = region->iova; > > > > > + size_t size = region->size; > > > > > + int flags = region->flags; > > > > > + uint64_t mask; > > > > > + int prot = 0; > > > > > + int ret; > > > > > + > > > > > + /* Verify that none of our __u64 fields overflow */ > > > > > + if (region->size != size || region->iova != iova) > > > > > + return -EINVAL; > > > > > + > > > > > + mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1; > > > > > + > > > > > + WARN_ON(mask & PAGE_MASK); > > > > > + > > > > > + if (flags & VFIO_IOMMU_RES_REGION_READ) > > > > > + prot |= IOMMU_READ; > > > > > + if (flags & VFIO_IOMMU_RES_REGION_WRITE) > > > > > + prot |= IOMMU_WRITE; > > > > > + > > > > > + if (!prot || !size || (size | iova) & mask) > > > > > + return -EINVAL; > > > > > + > > > > > + /* Don't allow IOVA wrap */ > > > > > + if (iova + size - 1 < iova) > > > > > + return -EINVAL; > > > > > + > > > > > + mutex_lock(&iommu->lock); > > > > > + > > > > > + if (region->flags & VFIO_IOMMU_RES_REGION_ADD) { > > > > > + ret = vfio_iommu_resvd_region_add(iommu, iova, size, > > > > prot); > > > > > + if (ret) { > > > > > + mutex_unlock(&iommu->lock); > > > > > + return ret; > > > > > + } > > > > > + } > > > > > > > > Silently fail if not VFIO_IOMMU_RES_REGION_ADD? > > > > > > As per below comment we do not need this flag. So the above flag > > checking will be removed. > > > > > > > > > > > > + > > > > > + mutex_unlock(&iommu->lock); > > > > > + return 0; > > > > > +} > > > > > + > > > > > +static > > > > > +int vfio_handle_reserved_region_del(struct vfio_iommu *iommu, > > > > > + struct vfio_iommu_reserved_region_del > > > > *region) { > > > > > + dma_addr_t iova = region->iova; > > > > > + size_t size = region->size; > > > > > + int flags = region->flags; > > > > > + int ret; > > > > > + > > > > > + if (!(flags & VFIO_IOMMU_RES_REGION_DEL)) > > > > > + return -EINVAL; > > > > > + > > > > > + mutex_lock(&iommu->lock); > > > > > + > > > > > + /* Check for the region */ > > > > > + if (vfio_find_resvd_region(iommu, iova, size) == NULL) { > > > > > + mutex_unlock(&iommu->lock); > > > > > + return -EINVAL; > > > > > + } > > > > > + > > > > > + /* remove the reserved region */ > > > > > + if (region->flags & VFIO_IOMMU_RES_REGION_DEL) { > > > > > + ret = vfio_iommu_resvd_region_del(iommu, iova, size, > > > > flags); > > > > > + if (ret) { > > > > > + mutex_unlock(&iommu->lock); > > > > > + return ret; > > > > > + } > > > > > + } > > > > > + > > > > > + mutex_unlock(&iommu->lock); > > > > > + return 0; > > > > > +} > > > > > + > > > > > static int vfio_bus_type(struct device *dev, void *data) { > > > > > struct bus_type **bus = data; > > > > > @@ -905,6 +1069,7 @@ static void > > *vfio_iommu_type1_open(unsigned > > > > long arg) > > > > > } > > > > > > > > > > INIT_LIST_HEAD(&iommu->domain_list); > > > > > + INIT_LIST_HEAD(&iommu->reserved_iova_list); > > > > > iommu->dma_list = RB_ROOT; > > > > > mutex_init(&iommu->lock); > > > > > > > > > > @@ -1020,6 +1185,40 @@ static long vfio_iommu_type1_ioctl(void > > > > *iommu_data, > > > > > return ret; > > > > > > > > > > return copy_to_user((void __user *)arg, &unmap, minsz); > > > > > + } else if (cmd == VFIO_IOMMU_RESERVED_REGION_ADD) { > > > > > + struct vfio_iommu_reserved_region_add region; > > > > > + long ret; > > > > > + > > > > > + minsz = offsetofend(struct > > > > vfio_iommu_reserved_region_add, > > > > > + size); > > > > > + if (copy_from_user(®ion, (void __user *)arg, minsz)) > > > > > + return -EFAULT; > > > > > + > > > > > + if (region.argsz < minsz) > > > > > + return -EINVAL; > > > > > + > > > > > + ret = vfio_handle_reserved_region_add(iommu, ®ion); > > > > > + if (ret) > > > > > + return ret; > > > > > + > > > > > + return copy_to_user((void __user *)arg, ®ion, minsz); > > > > > + } else if (cmd == VFIO_IOMMU_RESERVED_REGION_DEL) { > > > > > + struct vfio_iommu_reserved_region_del region; > > > > > + long ret; > > > > > + > > > > > + minsz = offsetofend(struct > > > > vfio_iommu_reserved_region_del, > > > > > + size); > > > > > + if (copy_from_user(®ion, (void __user *)arg, minsz)) > > > > > + return -EFAULT; > > > > > + > > > > > + if (region.argsz < minsz) > > > > > + return -EINVAL; > > > > > + > > > > > + ret = vfio_handle_reserved_region_del(iommu, ®ion); > > > > > + if (ret) > > > > > + return ret; > > > > > + > > > > > + return copy_to_user((void __user *)arg, ®ion, minsz); > > > > > > > > So we've just created an interface that is available for all > > > > vfio-type1 users, whether it makes any sense for the platform or > > > > not, > > > > > > How we should decide that a given platform needs this or not? > > > > You later add new iommu interfaces, presumably if the iommu doesn't > > implement those interfaces then there's no point in us exposing these > > interfaces to vfio. > > You mean if an iommu says "does not requires explicit-iommu-mapping MSIs" then if user-space calls these reserved-iova ioctls then we should return error without adding these regions, right? Yes, what would it mean for the user to add reserved regions if we have no means to use them? > > > > and it allows the user to > > > > consume arbitrary amounts of kernel memory, by making an infinitely > > > > long list of reserved iova entries, brilliant! > > > > > > I was not sure of how to limit the user. What I was thinking of having a > > default number of pages a user can reserve (512 pages). Also we can give a > > sysfs interface so that user can change the default number of pages. Does > > this sound good? If not please suggest? > > > > Isn't 512 entries a lot for a linked list? > > Can we use our existing rb tree to manage these entries rather than a secondary list? > > I do not think so, it will complicate the code. That's not a good answer. Is a flag on the rb entry insufficient? Why is that more complicated than a completely separate list? We can never have reserved mappings and dma mappings occupy the same IOVA space and the rb tree is tracking iova space, so it seems like a natural place to track both dma and reserved IOVA. > > How many entries do we realistically need? > > I think this should be small number of entries as discussed on other patch. Small number means 1 or 2 max on PowerPC and smmu (I think so). > > > Can the iommu callbacks help give us a limit? > > I am not sure right now how deterministically we can get number of regions/msi-pages needed for a given platform and for a list of devices in a group? > > > Can we somehow use information about the devices in the group to produce a limit, > > ie. MSI vectors possible from the group? > > It seems like we need to know number of vectors needed per device in the group and we needed to know how many vectors can supported using the reserved msi-page. We are sharing a msi-page then it becomes a little more complicated. But overall it seems like we need a 2-3 or very small number of regions and I will suggest that we can go with this small number can enhance if required later on. > > > > > > > > > > > > } > > > > > > > > > > return -ENOTTY; > > > > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > > > > > index b57b750..1abd1a9 100644 > > > > > --- a/include/uapi/linux/vfio.h > > > > > +++ b/include/uapi/linux/vfio.h > > > > > @@ -440,6 +440,49 @@ struct vfio_iommu_type1_dma_unmap { > > > > > #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) > > > > > #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) > > > > > > > > > > +/**************** Reserved IOVA region specific APIs > > > > > +**********************/ > > > > > + > > > > > +/* > > > > > + * VFIO_IOMMU_RESERVED_REGION_ADD - _IO(VFIO_TYPE, > > VFIO_BASE > > > > + 17, > > > > > + * struct > > > > vfio_iommu_reserved_region_add) > > > > > + * This is used to add a reserved iova region. > > > > > + * @flags - Input: VFIO_IOMMU_RES_REGION_ADD flag is for adding > > > > > + * a reserved region. > > > > > > > > Why else would we call VFIO_IOMMU_RESERVED_REGION_ADD except > > to add > > > > a region, this flag is redundant. > > > > > > Ok, will remove this. > > > > > > > > > > > > + * Also pass READ/WRITE/IOMMU flags to be used in iommu mapping. > > > > > + * @iova - Input: IOVA base address of reserved region > > > > > + * @size - Input: Size of the reserved region > > > > > + * Return: 0 on success, -errno on failure */ struct > > > > > +vfio_iommu_reserved_region_add { > > > > > + __u32 argsz; > > > > > + __u32 flags; > > > > > +#define VFIO_IOMMU_RES_REGION_ADD (1 << 0) /* Add a > > reserved > > > > region */ > > > > > +#define VFIO_IOMMU_RES_REGION_READ (1 << 1) /* readable > > region */ > > > > > +#define VFIO_IOMMU_RES_REGION_WRITE (1 << 2) /* writable > > > > region */ > > > > > + __u64 iova; > > > > > + __u64 size; > > > > > +}; > > > > > +#define VFIO_IOMMU_RESERVED_REGION_ADD _IO(VFIO_TYPE, > > > > VFIO_BASE + 17) > > > > > + > > > > > +/* > > > > > + * VFIO_IOMMU_RESERVED_REGION_DEL - _IO(VFIO_TYPE, > > VFIO_BASE + > > > > 18, > > > > > + * struct > > > > vfio_iommu_reserved_region_del) > > > > > + * This is used to delete an existing reserved iova region. > > > > > + * @flags - VFIO_IOMMU_RES_REGION_DEL flag is for deleting a > > > > > +region use, > > > > > + * only a unmapped region can be deleted. > > > > > + * @iova - Input: IOVA base address of reserved region > > > > > + * @size - Input: Size of the reserved region > > > > > + * Return: 0 on success, -errno on failure */ struct > > > > > +vfio_iommu_reserved_region_del { > > > > > + __u32 argsz; > > > > > + __u32 flags; > > > > > +#define VFIO_IOMMU_RES_REGION_DEL (1 << 0) /* unset the > > > > reserved region */ > > > > > + __u64 iova; > > > > > + __u64 size; > > > > > +}; > > > > > +#define VFIO_IOMMU_RESERVED_REGION_DEL _IO(VFIO_TYPE, > > > > VFIO_BASE + 18) > > > > > + > > > > > > > > These are effectively both > > > > > > > > struct vfio_iommu_type1_dma_unmap > > > > > > Yes, do you want to suggest that we should use " struct > > vfio_iommu_type1_dma_unmap". I found that confusing. > > > What is we use "struct vfio_iommu_reserved_region" and use flag > > VFIO_IOMMU_RES_REGION_DEL/ VFIO_IOMMU_RES_REGION_ADD. > > > > What if we just use the existing map and unmap interface with a flag to > > indicate an MSI reserved mapping? I don't really see why we need new ioctls > > for this. > > Existing map/unmap APIs uses virtual address, user-space mmap() and provide virtual address as well. I think in previous discussion you commented that let's keep then separate, over-riding complicates the map/unmap and can be cause of confusion, and keeping these separate also gives space to extend these reserved iova APIs for something else in future (do not know for what right now). I personally think that it make sense to keep them separate, let me know if you think otherwise It seems pretty simple for a userspace app to know that the virtual address isn't used when doing a mapping with MSI_RESERVED set. The proposed API doesn't allow any room for extension because it's completely unspecified what the reserved region is intended for. Here we use them for MSI, but what else are we allowed to use them for? How would the user specify on reserved type vs another in a future extension? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 57d8c37..fa5d3e4 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -59,6 +59,7 @@ struct vfio_iommu { struct rb_root dma_list; bool v2; bool nesting; + struct list_head reserved_iova_list; }; struct vfio_domain { @@ -77,6 +78,15 @@ struct vfio_dma { int prot; /* IOMMU_READ/WRITE */ }; +struct vfio_resvd_region { + dma_addr_t iova; + size_t size; + int prot; /* IOMMU_READ/WRITE */ + int refcount; /* ref count of mappings */ + uint64_t map_paddr; /* Mapped Physical Address */ + struct list_head next; +}; + struct vfio_group { struct iommu_group *iommu_group; struct list_head next; @@ -106,6 +116,38 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, return NULL; } +/* This function must be called with iommu->lock held */ +static bool vfio_overlap_with_resvd_region(struct vfio_iommu *iommu, + dma_addr_t start, size_t size) +{ + struct vfio_resvd_region *region; + + list_for_each_entry(region, &iommu->reserved_iova_list, next) { + if (region->iova < start) + return (start - region->iova < region->size); + else if (start < region->iova) + return (region->iova - start < size); + + return (region->size > 0 && size > 0); + } + + return false; +} + +/* This function must be called with iommu->lock held */ +static +struct vfio_resvd_region *vfio_find_resvd_region(struct vfio_iommu *iommu, + dma_addr_t start, size_t size) +{ + struct vfio_resvd_region *region; + + list_for_each_entry(region, &iommu->reserved_iova_list, next) + if (region->iova == start && region->size == size) + return region; + + return NULL; +} + static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new) { struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL; @@ -580,7 +622,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, mutex_lock(&iommu->lock); - if (vfio_find_dma(iommu, iova, size)) { + if (vfio_find_dma(iommu, iova, size) || + vfio_overlap_with_resvd_region(iommu, iova, size)) { mutex_unlock(&iommu->lock); return -EEXIST; } @@ -626,6 +669,127 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, return ret; } +/* This function must be called with iommu->lock held */ +static +int vfio_iommu_resvd_region_del(struct vfio_iommu *iommu, + dma_addr_t iova, size_t size, int prot) +{ + struct vfio_resvd_region *res_region; + + res_region = vfio_find_resvd_region(iommu, iova, size); + /* Region should not be mapped in iommu */ + if (res_region == NULL || res_region->map_paddr) + return -EINVAL; + + list_del(&res_region->next); + kfree(res_region); + return 0; +} + +/* This function must be called with iommu->lock held */ +static int vfio_iommu_resvd_region_add(struct vfio_iommu *iommu, + dma_addr_t iova, size_t size, int prot) +{ + struct vfio_resvd_region *res_region; + + /* Check overlap with with dma maping and reserved regions */ + if (vfio_find_dma(iommu, iova, size) || + vfio_find_resvd_region(iommu, iova, size)) + return -EEXIST; + + res_region = kzalloc(sizeof(*res_region), GFP_KERNEL); + if (res_region == NULL) + return -ENOMEM; + + res_region->iova = iova; + res_region->size = size; + res_region->prot = prot; + res_region->refcount = 0; + res_region->map_paddr = 0; + + list_add(&res_region->next, &iommu->reserved_iova_list); + + return 0; +} + +static +int vfio_handle_reserved_region_add(struct vfio_iommu *iommu, + struct vfio_iommu_reserved_region_add *region) +{ + dma_addr_t iova = region->iova; + size_t size = region->size; + int flags = region->flags; + uint64_t mask; + int prot = 0; + int ret; + + /* Verify that none of our __u64 fields overflow */ + if (region->size != size || region->iova != iova) + return -EINVAL; + + mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1; + + WARN_ON(mask & PAGE_MASK); + + if (flags & VFIO_IOMMU_RES_REGION_READ) + prot |= IOMMU_READ; + if (flags & VFIO_IOMMU_RES_REGION_WRITE) + prot |= IOMMU_WRITE; + + if (!prot || !size || (size | iova) & mask) + return -EINVAL; + + /* Don't allow IOVA wrap */ + if (iova + size - 1 < iova) + return -EINVAL; + + mutex_lock(&iommu->lock); + + if (region->flags & VFIO_IOMMU_RES_REGION_ADD) { + ret = vfio_iommu_resvd_region_add(iommu, iova, size, prot); + if (ret) { + mutex_unlock(&iommu->lock); + return ret; + } + } + + mutex_unlock(&iommu->lock); + return 0; +} + +static +int vfio_handle_reserved_region_del(struct vfio_iommu *iommu, + struct vfio_iommu_reserved_region_del *region) +{ + dma_addr_t iova = region->iova; + size_t size = region->size; + int flags = region->flags; + int ret; + + if (!(flags & VFIO_IOMMU_RES_REGION_DEL)) + return -EINVAL; + + mutex_lock(&iommu->lock); + + /* Check for the region */ + if (vfio_find_resvd_region(iommu, iova, size) == NULL) { + mutex_unlock(&iommu->lock); + return -EINVAL; + } + + /* remove the reserved region */ + if (region->flags & VFIO_IOMMU_RES_REGION_DEL) { + ret = vfio_iommu_resvd_region_del(iommu, iova, size, flags); + if (ret) { + mutex_unlock(&iommu->lock); + return ret; + } + } + + mutex_unlock(&iommu->lock); + return 0; +} + static int vfio_bus_type(struct device *dev, void *data) { struct bus_type **bus = data; @@ -905,6 +1069,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) } INIT_LIST_HEAD(&iommu->domain_list); + INIT_LIST_HEAD(&iommu->reserved_iova_list); iommu->dma_list = RB_ROOT; mutex_init(&iommu->lock); @@ -1020,6 +1185,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, return ret; return copy_to_user((void __user *)arg, &unmap, minsz); + } else if (cmd == VFIO_IOMMU_RESERVED_REGION_ADD) { + struct vfio_iommu_reserved_region_add region; + long ret; + + minsz = offsetofend(struct vfio_iommu_reserved_region_add, + size); + if (copy_from_user(®ion, (void __user *)arg, minsz)) + return -EFAULT; + + if (region.argsz < minsz) + return -EINVAL; + + ret = vfio_handle_reserved_region_add(iommu, ®ion); + if (ret) + return ret; + + return copy_to_user((void __user *)arg, ®ion, minsz); + } else if (cmd == VFIO_IOMMU_RESERVED_REGION_DEL) { + struct vfio_iommu_reserved_region_del region; + long ret; + + minsz = offsetofend(struct vfio_iommu_reserved_region_del, + size); + if (copy_from_user(®ion, (void __user *)arg, minsz)) + return -EFAULT; + + if (region.argsz < minsz) + return -EINVAL; + + ret = vfio_handle_reserved_region_del(iommu, ®ion); + if (ret) + return ret; + + return copy_to_user((void __user *)arg, ®ion, minsz); } return -ENOTTY; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index b57b750..1abd1a9 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -440,6 +440,49 @@ struct vfio_iommu_type1_dma_unmap { #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) +/**************** Reserved IOVA region specific APIs **********************/ + +/* + * VFIO_IOMMU_RESERVED_REGION_ADD - _IO(VFIO_TYPE, VFIO_BASE + 17, + * struct vfio_iommu_reserved_region_add) + * This is used to add a reserved iova region. + * @flags - Input: VFIO_IOMMU_RES_REGION_ADD flag is for adding + * a reserved region. + * Also pass READ/WRITE/IOMMU flags to be used in iommu mapping. + * @iova - Input: IOVA base address of reserved region + * @size - Input: Size of the reserved region + * Return: 0 on success, -errno on failure + */ +struct vfio_iommu_reserved_region_add { + __u32 argsz; + __u32 flags; +#define VFIO_IOMMU_RES_REGION_ADD (1 << 0) /* Add a reserved region */ +#define VFIO_IOMMU_RES_REGION_READ (1 << 1) /* readable region */ +#define VFIO_IOMMU_RES_REGION_WRITE (1 << 2) /* writable region */ + __u64 iova; + __u64 size; +}; +#define VFIO_IOMMU_RESERVED_REGION_ADD _IO(VFIO_TYPE, VFIO_BASE + 17) + +/* + * VFIO_IOMMU_RESERVED_REGION_DEL - _IO(VFIO_TYPE, VFIO_BASE + 18, + * struct vfio_iommu_reserved_region_del) + * This is used to delete an existing reserved iova region. + * @flags - VFIO_IOMMU_RES_REGION_DEL flag is for deleting a region use, + * only a unmapped region can be deleted. + * @iova - Input: IOVA base address of reserved region + * @size - Input: Size of the reserved region + * Return: 0 on success, -errno on failure + */ +struct vfio_iommu_reserved_region_del { + __u32 argsz; + __u32 flags; +#define VFIO_IOMMU_RES_REGION_DEL (1 << 0) /* unset the reserved region */ + __u64 iova; + __u64 size; +}; +#define VFIO_IOMMU_RESERVED_REGION_DEL _IO(VFIO_TYPE, VFIO_BASE + 18) + /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ /*
This Patch adds the VFIO APIs to add and remove reserved iova regions. The reserved iova region can be used for mapping some specific physical address in iommu. Currently we are planning to use this interface for adding iova regions for creating iommu of msi-pages. But the API are designed for future extension where some other physical address can be mapped. Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com> --- drivers/vfio/vfio_iommu_type1.c | 201 +++++++++++++++++++++++++++++++++++++++- include/uapi/linux/vfio.h | 43 +++++++++ 2 files changed, 243 insertions(+), 1 deletion(-)