Message ID | 20201008171558.410886-10-jean-philippe@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | virtio-iommu: VFIO integration | expand |
Hi Jean, On 10/8/20 7:15 PM, Jean-Philippe Brucker wrote: > From: Bharat Bhushan <bbhushan2@marvell.com> > > The virtio-iommu device can deal with arbitrary page sizes for virtual > endpoints, but for endpoints assigned with VFIO it must follow the page > granule used by the host IOMMU driver. > > Implement the interface to set the vIOMMU page size mask, called by VFIO > for each endpoint. We assume that all host IOMMU drivers use the same > page granule (the host page granule). Override the page_size_mask field > in the virtio config space. > > Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> > --- > v10: Use global page mask, allowing VFIO to override it until boot. > --- > hw/virtio/virtio-iommu.c | 51 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 51 insertions(+) > > diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c > index 8823bfc804a..dd0b3093d1b 100644 > --- a/hw/virtio/virtio-iommu.c > +++ b/hw/virtio/virtio-iommu.c > @@ -914,6 +914,56 @@ static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, > return 0; > } > > +static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, > + uint64_t page_size_mask, > + Error **errp) > +{ > + int new_granule, old_granule; > + IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); > + VirtIOIOMMU *s = sdev->viommu; > + > + if (!page_size_mask) { set errp > + return -1; > + } > + > + new_granule = ctz64(page_size_mask); > + old_granule = ctz64(s->config.page_size_mask); I think this would be interesting to add a trace point > + > + /* > + * Modifying the page size after machine initialization isn't supported. > + * Having a different mask is possible but the guest will use sub-optimal > + * block sizes, so warn about it. > + */ > + if (qdev_hotplug) { > + if (new_granule != old_granule) { > + error_setg(errp, > + "virtio-iommu page mask 0x%"PRIx64 > + " is incompatible with mask 0x%"PRIx64, > + s->config.page_size_mask, page_size_mask); > + return -1; > + } else if (page_size_mask != s->config.page_size_mask) { > + warn_report("virtio-iommu page mask 0x%"PRIx64 > + " does not match 0x%"PRIx64, > + s->config.page_size_mask, page_size_mask); > + } > + return 0; > + } > + > + /* > + * Disallow shrinking the page size. For example if an endpoint only > + * supports 64kB pages, we can't globally enable 4kB pages. But that > + * shouldn't happen, the host is unlikely to setup differing page granules. > + * The other bits are only hints describing optimal block sizes. > + */ > + if (new_granule < old_granule) { > + error_setg(errp, "memory region shrinks the virtio-iommu page granule"); > + return -1; > + } > + > + s->config.page_size_mask = page_size_mask; > + return 0; > +} > + > static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) > { > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > @@ -1146,6 +1196,7 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass, > imrc->translate = virtio_iommu_translate; > imrc->replay = virtio_iommu_replay; > imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; > + imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; > } > > static const TypeInfo virtio_iommu_info = { > Thanks Eric
On Thu, Oct 08, 2020 at 07:15:57PM +0200, Jean-Philippe Brucker wrote: > From: Bharat Bhushan <bbhushan2@marvell.com> > > The virtio-iommu device can deal with arbitrary page sizes for virtual > endpoints, but for endpoints assigned with VFIO it must follow the page > granule used by the host IOMMU driver. > > Implement the interface to set the vIOMMU page size mask, called by VFIO > for each endpoint. We assume that all host IOMMU drivers use the same > page granule (the host page granule). Override the page_size_mask field > in the virtio config space. > > Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> > --- > v10: Use global page mask, allowing VFIO to override it until boot. > --- > hw/virtio/virtio-iommu.c | 51 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 51 insertions(+) > > diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c > index 8823bfc804a..dd0b3093d1b 100644 > --- a/hw/virtio/virtio-iommu.c > +++ b/hw/virtio/virtio-iommu.c > @@ -914,6 +914,56 @@ static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, > return 0; > } > > +static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, > + uint64_t page_size_mask, > + Error **errp) > +{ > + int new_granule, old_granule; > + IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); > + VirtIOIOMMU *s = sdev->viommu; > + > + if (!page_size_mask) { > + return -1; > + } > + > + new_granule = ctz64(page_size_mask); > + old_granule = ctz64(s->config.page_size_mask); > + > + /* > + * Modifying the page size after machine initialization isn't supported. > + * Having a different mask is possible but the guest will use sub-optimal > + * block sizes, so warn about it. > + */ > + if (qdev_hotplug) { > + if (new_granule != old_granule) { > + error_setg(errp, > + "virtio-iommu page mask 0x%"PRIx64 > + " is incompatible with mask 0x%"PRIx64, > + s->config.page_size_mask, page_size_mask); > + return -1; > + } else if (page_size_mask != s->config.page_size_mask) { > + warn_report("virtio-iommu page mask 0x%"PRIx64 > + " does not match 0x%"PRIx64, > + s->config.page_size_mask, page_size_mask); > + } > + return 0; > + } > + > + /* > + * Disallow shrinking the page size. For example if an endpoint only > + * supports 64kB pages, we can't globally enable 4kB pages. But that > + * shouldn't happen, the host is unlikely to setup differing page granules. > + * The other bits are only hints describing optimal block sizes. > + */ > + if (new_granule < old_granule) { > + error_setg(errp, "memory region shrinks the virtio-iommu page granule"); > + return -1; > + } My understanding is that shrink is actually allowed, instead we should forbid growing of the mask? For example, initially the old_granule will always points to the guest page size. Then as long as the host page size (which new_granule represents) is smaller than the old_granule, then it seems fine... Or am I wrong? Another thing, IIUC this function will be majorly called in vfio code when the container page mask will be passed into it. If there're multiple vfio containers that support different host IOMMU page sizes, then IIUC the order of the call to virtio_iommu_set_page_size_mask() is undefined. It's probably related to which "-device vfio-pci,..." parameter is earlier. To make this simpler, I'm thinking whether we should just forbid the case where devices have different iommu page sizes. So when assigned devices are used, we make sure all host iommu page sizes are the same, and the value should be smaller than guest page size. Otherwise we'll simply fall back to guest psize. Thanks, > + > + s->config.page_size_mask = page_size_mask; > + return 0; > +} > + > static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) > { > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > @@ -1146,6 +1196,7 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass, > imrc->translate = virtio_iommu_translate; > imrc->replay = virtio_iommu_replay; > imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; > + imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; > } > > static const TypeInfo virtio_iommu_info = { > -- > 2.28.0 >
On Mon, Oct 19, 2020 at 05:35:39PM -0400, Peter Xu wrote: > > + /* > > + * Disallow shrinking the page size. For example if an endpoint only > > + * supports 64kB pages, we can't globally enable 4kB pages. But that > > + * shouldn't happen, the host is unlikely to setup differing page granules. > > + * The other bits are only hints describing optimal block sizes. > > + */ > > + if (new_granule < old_granule) { > > + error_setg(errp, "memory region shrinks the virtio-iommu page granule"); > > + return -1; > > + } > > My understanding is that shrink is actually allowed, instead we should forbid > growing of the mask? For example, initially the old_granule will always points > to the guest page size. Then as long as the host page size (which new_granule > represents) is smaller than the old_granule, then it seems fine... Or am I wrong? The case I was checking against is two assigned devices with different page sizes. First one sets a 64kB page size, then the second one shouldn't be able to shrink it back to 4kB, because the guest would create mappings not aligned on 64kB, which can't be applied by the pIOMMU of the first device. But let's forget this case for now, in practice all assigned devices use the host page size. > > Another thing, IIUC this function will be majorly called in vfio code when the > container page mask will be passed into it. If there're multiple vfio > containers that support different host IOMMU page sizes, then IIUC the order of > the call to virtio_iommu_set_page_size_mask() is undefined. It's probably > related to which "-device vfio-pci,..." parameter is earlier. > > To make this simpler, I'm thinking whether we should just forbid the case where > devices have different iommu page sizes. So when assigned devices are used, we > make sure all host iommu page sizes are the same, and the value should be > smaller than guest page size. Otherwise we'll simply fall back to guest psize. Mostly agree, I need to simplify this function. I don't think we care about guest page size, though. Currently our default mask is TARGET_PAGE_MASK, which is the smallest size supported by vCPUs (generally 4kB), but it doesn't really mean guest page size, since the guest can choose a larger granule at runtime. Besides virtio-iommu can in theory map at byte granule if there isn't any assigned device, so our default mask could as well be ~0ULL (but doesn't work at the moment, I've tried). So what I'd like to do for next version: * Set qemu_real_host_page_mask as the default page mask, instead of the rather arbitrary TARGET_PAGE_MASK. Otherwise we cannot hotplug assigned devices on a 64kB host, since TARGET_PAGE_MASK is pretty much always 4kB. * Disallow changing the page size. It's simpler and works in practice if we default to qemu_real_host_page_mask. * For non-hotplug devices, allow changing the rest of the mask. For hotplug devices, only warn about it. Thanks, Jean
On Fri, Oct 16, 2020 at 03:08:03PM +0200, Auger Eric wrote: > > +static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, > > + uint64_t page_size_mask, > > + Error **errp) > > +{ > > + int new_granule, old_granule; > > + IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); > > + VirtIOIOMMU *s = sdev->viommu; > > + > > + if (!page_size_mask) { > set errp Woops, fixed > > + return -1; > > + } > > + > > + new_granule = ctz64(page_size_mask); > > + old_granule = ctz64(s->config.page_size_mask); > > I think this would be interesting to add a trace point Agreed Thanks, Jean
On Thu, Oct 22, 2020 at 06:39:37PM +0200, Jean-Philippe Brucker wrote: > So what I'd like to do for next version: > > * Set qemu_real_host_page_mask as the default page mask, instead of the > rather arbitrary TARGET_PAGE_MASK. Oh, I thought TARGET_PAGE_MASK was intended - kernel committ 39b3b3c9cac1 ("iommu/virtio: Reject IOMMU page granule larger than PAGE_SIZE", 2020-03-27) explicitly introduced a check that virtio-iommu kernel driver will fail directly if this psize is bigger than PAGE_SIZE in the guest. So it sounds reasonable to have the default value as PAGE_SIZE (if it's the same as TARGET_PAGE_SIZE in QEMU, which seems true?). For example, I'm thinking whether qemu_real_host_page_mask could be bigger than PAGE_SIZE in the guest in some environments, then it seems virtio-iommu won't boot anymore without assigned devices, because that extra check above will always fail. > Otherwise we cannot hotplug assigned > devices on a 64kB host, since TARGET_PAGE_MASK is pretty much always > 4kB. > > * Disallow changing the page size. It's simpler and works in > practice if we default to qemu_real_host_page_mask. > > * For non-hotplug devices, allow changing the rest of the mask. For > hotplug devices, only warn about it. Could I ask what's "the rest of the mask"? On the driver side, I see that viommu_domain_finalise() will pick the largest supported page size to use, if so then we seem to be quite restricted on what page size we can use. I'm also a bit curious about what scenario we plan to support in this initial version, especially for ARM. For x86, I think it's probably always 4k everywhere so it's fairly simple. Know little on ARM side... Thanks,
On Thu, Oct 22, 2020 at 04:56:16PM -0400, Peter Xu wrote: > On Thu, Oct 22, 2020 at 06:39:37PM +0200, Jean-Philippe Brucker wrote: > > So what I'd like to do for next version: > > > > * Set qemu_real_host_page_mask as the default page mask, instead of the > > rather arbitrary TARGET_PAGE_MASK. > > Oh, I thought TARGET_PAGE_MASK was intended - kernel committ 39b3b3c9cac1 > ("iommu/virtio: Reject IOMMU page granule larger than PAGE_SIZE", 2020-03-27) > explicitly introduced a check that virtio-iommu kernel driver will fail > directly if this psize is bigger than PAGE_SIZE in the guest. So it sounds > reasonable to have the default value as PAGE_SIZE (if it's the same as > TARGET_PAGE_SIZE in QEMU, which seems true?). > > For example, I'm thinking whether qemu_real_host_page_mask could be bigger than > PAGE_SIZE in the guest in some environments, then it seems virtio-iommu won't > boot anymore without assigned devices, because that extra check above will > always fail. Right, I missed this problem again. Switching to qemu_real_host_page_mask is probably not the best idea until we solve the host64k-guest4k problem. > > > Otherwise we cannot hotplug assigned > > devices on a 64kB host, since TARGET_PAGE_MASK is pretty much always > > 4kB. > > > > * Disallow changing the page size. It's simpler and works in > > practice if we default to qemu_real_host_page_mask. > > > > * For non-hotplug devices, allow changing the rest of the mask. For > > hotplug devices, only warn about it. > > Could I ask what's "the rest of the mask"? The LSB in the mask defines the page size. The other bits define which block sizes are supported, for example 2MB and 1GB blocks with a 4k page size. These are only for optimization, the upper bits of the mask could also be all 1s. If the guest aligns its mappings on those block sizes, then the host can use intermediate levels in the page tables resulting in fewer IOTLB entries. > On the driver side, I see that > viommu_domain_finalise() will pick the largest supported page size to use, if > so then we seem to be quite restricted on what page size we can use. In Linux iommu_dma_alloc_remap() tries to allocate blocks based on the page mask (copied by viommu_domain_finalise() into domain->pgsize_bitmap) > I'm also a bit curious about what scenario we plan to support in this initial > version, especially for ARM. For x86, I think it's probably always 4k > everywhere so it's fairly simple. Know little on ARM side... Arm CPUs and SMMU support 4k, 16k and 64k page sizes. I don't think 16k is used anywhere but some distributions chose 64k (RHEL, I think?), others 4k, so we need to support both. Unfortunately as noted above host64k-guest4k is not possible without adding a negotiation mechanism to virtio-iommu, host VFIO and IOMMU driver. Thanks, Jean
On Fri, Oct 23, 2020 at 09:48:58AM +0200, Jean-Philippe Brucker wrote: > Arm CPUs and SMMU support 4k, 16k and 64k page sizes. I don't think 16k is > used anywhere but some distributions chose 64k (RHEL, I think?), others > 4k, so we need to support both. > > Unfortunately as noted above host64k-guest4k is not possible without > adding a negotiation mechanism to virtio-iommu, host VFIO and IOMMU > driver. I see. Then it seems we would still need to support host4k-guest64k. Maybe for assigned case, we can simply AND all the psize_masks of all the vfio containers that supported to replace the default psize mask (TARGET_PAGE_SIZE) without caring about whether it's shrinking or not? Note that current patch only update config.psize_mask to the new one, but I think we need to calculate the subset of all containers rather than a simply update. Then with the help of 39b3b3c9cac1 imho we'll gracefully fail the probe if the psize is not suitable anyway, e.g., host64k-guest4k. Thanks,
On Fri, Oct 23, 2020 at 12:47:02PM -0400, Peter Xu wrote: > On Fri, Oct 23, 2020 at 09:48:58AM +0200, Jean-Philippe Brucker wrote: > > Arm CPUs and SMMU support 4k, 16k and 64k page sizes. I don't think 16k is > > used anywhere but some distributions chose 64k (RHEL, I think?), others > > 4k, so we need to support both. > > > > Unfortunately as noted above host64k-guest4k is not possible without > > adding a negotiation mechanism to virtio-iommu, host VFIO and IOMMU > > driver. > > I see. Then it seems we would still need to support host4k-guest64k. > > Maybe for assigned case, we can simply AND all the psize_masks of all the vfio > containers that supported to replace the default psize mask (TARGET_PAGE_SIZE) > without caring about whether it's shrinking or not? Note that current patch > only update config.psize_mask to the new one, but I think we need to calculate > the subset of all containers rather than a simply update. Yes I think an AND is the right operation. We'll return an error if the resulting mask is 0. Then for hotplug, I think I'll keep the current "best effort" code from this patch. If necessary we could later add a parameter to set a default mask and guarantee hotplug success. Thanks, Jean > Then with the help > of 39b3b3c9cac1 imho we'll gracefully fail the probe if the psize is not > suitable anyway, e.g., host64k-guest4k. > > Thanks, > > -- > Peter Xu >
On Tue, Oct 27, 2020 at 06:38:40PM +0100, Jean-Philippe Brucker wrote: > On Fri, Oct 23, 2020 at 12:47:02PM -0400, Peter Xu wrote: > > On Fri, Oct 23, 2020 at 09:48:58AM +0200, Jean-Philippe Brucker wrote: > > > Arm CPUs and SMMU support 4k, 16k and 64k page sizes. I don't think 16k is > > > used anywhere but some distributions chose 64k (RHEL, I think?), others > > > 4k, so we need to support both. > > > > > > Unfortunately as noted above host64k-guest4k is not possible without > > > adding a negotiation mechanism to virtio-iommu, host VFIO and IOMMU > > > driver. > > > > I see. Then it seems we would still need to support host4k-guest64k. > > > > Maybe for assigned case, we can simply AND all the psize_masks of all the vfio > > containers that supported to replace the default psize mask (TARGET_PAGE_SIZE) > > without caring about whether it's shrinking or not? Note that current patch > > only update config.psize_mask to the new one, but I think we need to calculate > > the subset of all containers rather than a simply update. > > Yes I think an AND is the right operation. We'll return an error if the > resulting mask is 0. Then for hotplug, I think I'll keep the current "best > effort" code from this patch. If necessary we could later add a parameter > to set a default mask and guarantee hotplug success. > > Thanks, > Jean So I should expect a new version with that? > > Then with the help > > of 39b3b3c9cac1 imho we'll gracefully fail the probe if the psize is not > > suitable anyway, e.g., host64k-guest4k. > > > > Thanks, > > > > -- > > Peter Xu > >
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 8823bfc804a..dd0b3093d1b 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -914,6 +914,56 @@ static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, return 0; } +static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, + uint64_t page_size_mask, + Error **errp) +{ + int new_granule, old_granule; + IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); + VirtIOIOMMU *s = sdev->viommu; + + if (!page_size_mask) { + return -1; + } + + new_granule = ctz64(page_size_mask); + old_granule = ctz64(s->config.page_size_mask); + + /* + * Modifying the page size after machine initialization isn't supported. + * Having a different mask is possible but the guest will use sub-optimal + * block sizes, so warn about it. + */ + if (qdev_hotplug) { + if (new_granule != old_granule) { + error_setg(errp, + "virtio-iommu page mask 0x%"PRIx64 + " is incompatible with mask 0x%"PRIx64, + s->config.page_size_mask, page_size_mask); + return -1; + } else if (page_size_mask != s->config.page_size_mask) { + warn_report("virtio-iommu page mask 0x%"PRIx64 + " does not match 0x%"PRIx64, + s->config.page_size_mask, page_size_mask); + } + return 0; + } + + /* + * Disallow shrinking the page size. For example if an endpoint only + * supports 64kB pages, we can't globally enable 4kB pages. But that + * shouldn't happen, the host is unlikely to setup differing page granules. + * The other bits are only hints describing optimal block sizes. + */ + if (new_granule < old_granule) { + error_setg(errp, "memory region shrinks the virtio-iommu page granule"); + return -1; + } + + s->config.page_size_mask = page_size_mask; + return 0; +} + static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); @@ -1146,6 +1196,7 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass, imrc->translate = virtio_iommu_translate; imrc->replay = virtio_iommu_replay; imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; + imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; } static const TypeInfo virtio_iommu_info = {