Message ID | 1580300216-86172-12-git-send-email-yi.l.liu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | intel_iommu: expose Shared Virtual Addressing to VMs | expand |
On Wed, Jan 29, 2020 at 04:16:42AM -0800, Liu, Yi L wrote: > From: Liu Yi L <yi.l.liu@intel.com> > > VFIO checks IOMMU UAPI version when it finds Kernel supports > VFIO_TYPE1_NESTING_IOMMU. It is enough for UAPI compatibility > check. However, IOMMU UAPI may support multiple stage-1 pasid > formats in a specific UAPI version, which is highly possible > since IOMMU UAPI supports stage-1 formats across all IOMMU vendors. > So VFIO needs to get the supported formats from Kernel and tell > vIOMMU. Let vIOMMU select proper format when setup dual stage DMA > translation. > > This patch gets the stage-1 pasid format from kernel by using IOCTL > VFIO_IOMMU_GET_INFO and pass the supported format to vIOMMU by the > DualStageIOMMUObject instance which has been registered to vIOMMU. > > This patch referred some code from Shameer Kolothum. > https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html > > Cc: Kevin Tian <kevin.tian@intel.com> > Cc: Jacob Pan <jacob.jun.pan@linux.intel.com> > Cc: Peter Xu <peterx@redhat.com> > Cc: Eric Auger <eric.auger@redhat.com> > Cc: Yi Sun <yi.y.sun@linux.intel.com> > Cc: David Gibson <david@gibson.dropbear.id.au> > Cc: Alex Williamson <alex.williamson@redhat.com> > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com> > --- > hw/iommu/dual_stage_iommu.c | 5 ++- > hw/vfio/common.c | 85 ++++++++++++++++++++++++++++++++++++- > include/hw/iommu/dual_stage_iommu.h | 10 ++++- > 3 files changed, 97 insertions(+), 3 deletions(-) > > diff --git a/hw/iommu/dual_stage_iommu.c b/hw/iommu/dual_stage_iommu.c > index be4179d..d5a7168 100644 > --- a/hw/iommu/dual_stage_iommu.c > +++ b/hw/iommu/dual_stage_iommu.c > @@ -48,9 +48,12 @@ int ds_iommu_pasid_free(DualStageIOMMUObject *dsi_obj, uint32_t pasid) > } > > void ds_iommu_object_init(DualStageIOMMUObject *dsi_obj, > - DualStageIOMMUOps *ops) > + DualStageIOMMUOps *ops, > + DualStageIOMMUInfo *uinfo) > { > dsi_obj->ops = ops; > + > + dsi_obj->uinfo.pasid_format = uinfo->pasid_format; > } > > void ds_iommu_object_destroy(DualStageIOMMUObject *dsi_obj) > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index fc1723d..a07824b 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -1182,10 +1182,84 @@ static int vfio_get_iommu_type(VFIOContainer *container, > static struct DualStageIOMMUOps vfio_ds_iommu_ops = { > }; > > +static int vfio_get_iommu_info(VFIOContainer *container, > + struct vfio_iommu_type1_info **info) Better comment on the function to remember to free(*info) after use for the callers. > +{ > + > + size_t argsz = sizeof(struct vfio_iommu_type1_info); > + Nit: extra newline. > + > + *info = g_malloc0(argsz); > + > +retry: > + (*info)->argsz = argsz; > + > + if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { > + g_free(*info); > + *info = NULL; > + return -errno; > + } > + > + if (((*info)->argsz > argsz)) { > + argsz = (*info)->argsz; > + *info = g_realloc(*info, argsz); > + goto retry; > + } > + > + return 0; > +} > + > +static struct vfio_info_cap_header * > +vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) > +{ > + struct vfio_info_cap_header *hdr; > + void *ptr = info; > + > + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { > + return NULL; > + } > + > + for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { > + if (hdr->id == id) { > + return hdr; > + } > + } > + > + return NULL; > +} > + > +static int vfio_get_nesting_iommu_format(VFIOContainer *container, > + uint32_t *pasid_format) > +{ > + struct vfio_iommu_type1_info *info; > + struct vfio_info_cap_header *hdr; > + struct vfio_iommu_type1_info_cap_nesting *cap; > + > + if (vfio_get_iommu_info(container, &info)) { > + return -errno; Should return the retcode from vfio_get_iommu_info. > + } > + > + hdr = vfio_get_iommu_info_cap(info, > + VFIO_IOMMU_TYPE1_INFO_CAP_NESTING); > + if (!hdr) { > + g_free(info); > + return -errno; > + } > + > + cap = container_of(hdr, > + struct vfio_iommu_type1_info_cap_nesting, header); > + *pasid_format = cap->pasid_format; > + > + g_free(info); > + return 0; > +} > + > static int vfio_init_container(VFIOContainer *container, int group_fd, > Error **errp) > { > int iommu_type, ret; > + uint32_t format; > + DualStageIOMMUInfo uinfo; > > iommu_type = vfio_get_iommu_type(container, errp); > if (iommu_type < 0) { > @@ -1214,7 +1288,16 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, > } > > if (iommu_type == VFIO_TYPE1_NESTING_IOMMU) { > - ds_iommu_object_init(&container->dsi_obj, &vfio_ds_iommu_ops); > + if (vfio_get_nesting_iommu_format(container, &format)) { > + error_setg_errno(errp, errno, > + "Failed to get nesting iommu format"); > + return -errno; Same here, you might want to return the retcode from vfio_get_nesting_iommu_format()? > + } > + > + uinfo.pasid_format = format; > + ds_iommu_object_init(&container->dsi_obj, > + &vfio_ds_iommu_ops, &uinfo); > + > if (iommu_context_register_ds_iommu(container->iommu_ctx, > &container->dsi_obj)) { > /* > diff --git a/include/hw/iommu/dual_stage_iommu.h b/include/hw/iommu/dual_stage_iommu.h > index e9891e3..c6100b4 100644 > --- a/include/hw/iommu/dual_stage_iommu.h > +++ b/include/hw/iommu/dual_stage_iommu.h > @@ -23,12 +23,14 @@ > #define HW_DS_IOMMU_H > > #include "qemu/queue.h" > +#include <linux/iommu.h> > #ifndef CONFIG_USER_ONLY > #include "exec/hwaddr.h" > #endif > > typedef struct DualStageIOMMUObject DualStageIOMMUObject; > typedef struct DualStageIOMMUOps DualStageIOMMUOps; > +typedef struct DualStageIOMMUInfo DualStageIOMMUInfo; > > struct DualStageIOMMUOps { > /* Allocate pasid from DualStageIOMMU (a.k.a. host IOMMU) */ > @@ -41,11 +43,16 @@ struct DualStageIOMMUOps { > uint32_t pasid); > }; > > +struct DualStageIOMMUInfo { > + uint32_t pasid_format; > +}; > + > /* > * This is an abstraction of Dual-stage IOMMU. > */ > struct DualStageIOMMUObject { > DualStageIOMMUOps *ops; > + DualStageIOMMUInfo uinfo; > }; > > int ds_iommu_pasid_alloc(DualStageIOMMUObject *dsi_obj, uint32_t min, > @@ -53,7 +60,8 @@ int ds_iommu_pasid_alloc(DualStageIOMMUObject *dsi_obj, uint32_t min, > int ds_iommu_pasid_free(DualStageIOMMUObject *dsi_obj, uint32_t pasid); > > void ds_iommu_object_init(DualStageIOMMUObject *dsi_obj, > - DualStageIOMMUOps *ops); > + DualStageIOMMUOps *ops, > + DualStageIOMMUInfo *uinfo); > void ds_iommu_object_destroy(DualStageIOMMUObject *dsi_obj); > > #endif > -- > 2.7.4 >
> From: Peter Xu <peterx@redhat.com> > Sent: Wednesday, February 12, 2020 3:30 AM > To: Liu, Yi L <yi.l.liu@intel.com> > Subject: Re: [RFC v3 11/25] vfio: get stage-1 pasid formats from Kernel > > On Wed, Jan 29, 2020 at 04:16:42AM -0800, Liu, Yi L wrote: > > From: Liu Yi L <yi.l.liu@intel.com> > > > > VFIO checks IOMMU UAPI version when it finds Kernel supports > > VFIO_TYPE1_NESTING_IOMMU. It is enough for UAPI compatibility check. > > However, IOMMU UAPI may support multiple stage-1 pasid formats in a > > specific UAPI version, which is highly possible since IOMMU UAPI > > supports stage-1 formats across all IOMMU vendors. > > So VFIO needs to get the supported formats from Kernel and tell > > vIOMMU. Let vIOMMU select proper format when setup dual stage DMA > > translation. > > > > This patch gets the stage-1 pasid format from kernel by using IOCTL > > VFIO_IOMMU_GET_INFO and pass the supported format to vIOMMU by the > > DualStageIOMMUObject instance which has been registered to vIOMMU. > > > > This patch referred some code from Shameer Kolothum. > > https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html > > > > Cc: Kevin Tian <kevin.tian@intel.com> > > Cc: Jacob Pan <jacob.jun.pan@linux.intel.com> > > Cc: Peter Xu <peterx@redhat.com> > > Cc: Eric Auger <eric.auger@redhat.com> > > Cc: Yi Sun <yi.y.sun@linux.intel.com> > > Cc: David Gibson <david@gibson.dropbear.id.au> > > Cc: Alex Williamson <alex.williamson@redhat.com> > > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > > Signed-off-by: Liu Yi L <yi.l.liu@intel.com> > > --- > > hw/iommu/dual_stage_iommu.c | 5 ++- > > hw/vfio/common.c | 85 > ++++++++++++++++++++++++++++++++++++- > > include/hw/iommu/dual_stage_iommu.h | 10 ++++- > > 3 files changed, 97 insertions(+), 3 deletions(-) > > > > diff --git a/hw/iommu/dual_stage_iommu.c > b/hw/iommu/dual_stage_iommu.c > > index be4179d..d5a7168 100644 > > --- a/hw/iommu/dual_stage_iommu.c > > +++ b/hw/iommu/dual_stage_iommu.c > > @@ -48,9 +48,12 @@ int ds_iommu_pasid_free(DualStageIOMMUObject > > *dsi_obj, uint32_t pasid) } > > > > void ds_iommu_object_init(DualStageIOMMUObject *dsi_obj, > > - DualStageIOMMUOps *ops) > > + DualStageIOMMUOps *ops, > > + DualStageIOMMUInfo *uinfo) > > { > > dsi_obj->ops = ops; > > + > > + dsi_obj->uinfo.pasid_format = uinfo->pasid_format; > > } > > > > void ds_iommu_object_destroy(DualStageIOMMUObject *dsi_obj) diff > > --git a/hw/vfio/common.c b/hw/vfio/common.c index fc1723d..a07824b > > 100644 > > --- a/hw/vfio/common.c > > +++ b/hw/vfio/common.c > > @@ -1182,10 +1182,84 @@ static int vfio_get_iommu_type(VFIOContainer > > *container, static struct DualStageIOMMUOps vfio_ds_iommu_ops = { }; > > > > +static int vfio_get_iommu_info(VFIOContainer *container, > > + struct vfio_iommu_type1_info **info) > > Better comment on the function to remember to free(*info) after use for the > callers. Will do.
diff --git a/hw/iommu/dual_stage_iommu.c b/hw/iommu/dual_stage_iommu.c index be4179d..d5a7168 100644 --- a/hw/iommu/dual_stage_iommu.c +++ b/hw/iommu/dual_stage_iommu.c @@ -48,9 +48,12 @@ int ds_iommu_pasid_free(DualStageIOMMUObject *dsi_obj, uint32_t pasid) } void ds_iommu_object_init(DualStageIOMMUObject *dsi_obj, - DualStageIOMMUOps *ops) + DualStageIOMMUOps *ops, + DualStageIOMMUInfo *uinfo) { dsi_obj->ops = ops; + + dsi_obj->uinfo.pasid_format = uinfo->pasid_format; } void ds_iommu_object_destroy(DualStageIOMMUObject *dsi_obj) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index fc1723d..a07824b 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1182,10 +1182,84 @@ static int vfio_get_iommu_type(VFIOContainer *container, static struct DualStageIOMMUOps vfio_ds_iommu_ops = { }; +static int vfio_get_iommu_info(VFIOContainer *container, + struct vfio_iommu_type1_info **info) +{ + + size_t argsz = sizeof(struct vfio_iommu_type1_info); + + + *info = g_malloc0(argsz); + +retry: + (*info)->argsz = argsz; + + if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { + g_free(*info); + *info = NULL; + return -errno; + } + + if (((*info)->argsz > argsz)) { + argsz = (*info)->argsz; + *info = g_realloc(*info, argsz); + goto retry; + } + + return 0; +} + +static struct vfio_info_cap_header * +vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) +{ + struct vfio_info_cap_header *hdr; + void *ptr = info; + + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { + return NULL; + } + + for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { + if (hdr->id == id) { + return hdr; + } + } + + return NULL; +} + +static int vfio_get_nesting_iommu_format(VFIOContainer *container, + uint32_t *pasid_format) +{ + struct vfio_iommu_type1_info *info; + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_nesting *cap; + + if (vfio_get_iommu_info(container, &info)) { + return -errno; + } + + hdr = vfio_get_iommu_info_cap(info, + VFIO_IOMMU_TYPE1_INFO_CAP_NESTING); + if (!hdr) { + g_free(info); + return -errno; + } + + cap = container_of(hdr, + struct vfio_iommu_type1_info_cap_nesting, header); + *pasid_format = cap->pasid_format; + + g_free(info); + return 0; +} + static int vfio_init_container(VFIOContainer *container, int group_fd, Error **errp) { int iommu_type, ret; + uint32_t format; + DualStageIOMMUInfo uinfo; iommu_type = vfio_get_iommu_type(container, errp); if (iommu_type < 0) { @@ -1214,7 +1288,16 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, } if (iommu_type == VFIO_TYPE1_NESTING_IOMMU) { - ds_iommu_object_init(&container->dsi_obj, &vfio_ds_iommu_ops); + if (vfio_get_nesting_iommu_format(container, &format)) { + error_setg_errno(errp, errno, + "Failed to get nesting iommu format"); + return -errno; + } + + uinfo.pasid_format = format; + ds_iommu_object_init(&container->dsi_obj, + &vfio_ds_iommu_ops, &uinfo); + if (iommu_context_register_ds_iommu(container->iommu_ctx, &container->dsi_obj)) { /* diff --git a/include/hw/iommu/dual_stage_iommu.h b/include/hw/iommu/dual_stage_iommu.h index e9891e3..c6100b4 100644 --- a/include/hw/iommu/dual_stage_iommu.h +++ b/include/hw/iommu/dual_stage_iommu.h @@ -23,12 +23,14 @@ #define HW_DS_IOMMU_H #include "qemu/queue.h" +#include <linux/iommu.h> #ifndef CONFIG_USER_ONLY #include "exec/hwaddr.h" #endif typedef struct DualStageIOMMUObject DualStageIOMMUObject; typedef struct DualStageIOMMUOps DualStageIOMMUOps; +typedef struct DualStageIOMMUInfo DualStageIOMMUInfo; struct DualStageIOMMUOps { /* Allocate pasid from DualStageIOMMU (a.k.a. host IOMMU) */ @@ -41,11 +43,16 @@ struct DualStageIOMMUOps { uint32_t pasid); }; +struct DualStageIOMMUInfo { + uint32_t pasid_format; +}; + /* * This is an abstraction of Dual-stage IOMMU. */ struct DualStageIOMMUObject { DualStageIOMMUOps *ops; + DualStageIOMMUInfo uinfo; }; int ds_iommu_pasid_alloc(DualStageIOMMUObject *dsi_obj, uint32_t min, @@ -53,7 +60,8 @@ int ds_iommu_pasid_alloc(DualStageIOMMUObject *dsi_obj, uint32_t min, int ds_iommu_pasid_free(DualStageIOMMUObject *dsi_obj, uint32_t pasid); void ds_iommu_object_init(DualStageIOMMUObject *dsi_obj, - DualStageIOMMUOps *ops); + DualStageIOMMUOps *ops, + DualStageIOMMUInfo *uinfo); void ds_iommu_object_destroy(DualStageIOMMUObject *dsi_obj); #endif