diff mbox series

[RFC,v3,11/25] vfio: get stage-1 pasid formats from Kernel

Message ID 1580300216-86172-12-git-send-email-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series intel_iommu: expose Shared Virtual Addressing to VMs | expand

Commit Message

Yi Liu Jan. 29, 2020, 12:16 p.m. UTC
From: Liu Yi L <yi.l.liu@intel.com>

VFIO checks IOMMU UAPI version when it finds Kernel supports
VFIO_TYPE1_NESTING_IOMMU. It is enough for UAPI compatibility
check. However, IOMMU UAPI may support multiple stage-1 pasid
formats in a specific UAPI version, which is highly possible
since IOMMU UAPI supports stage-1 formats across all IOMMU vendors.
So VFIO needs to get the supported formats from Kernel and tell
vIOMMU. Let vIOMMU select proper format when setup dual stage DMA
translation.

This patch gets the stage-1 pasid format from kernel by using IOCTL
VFIO_IOMMU_GET_INFO and pass the supported format to vIOMMU by the
DualStageIOMMUObject instance which has been registered to vIOMMU.

This patch referred some code from Shameer Kolothum.
https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html

Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Eric Auger <eric.auger@redhat.com>
Cc: Yi Sun <yi.y.sun@linux.intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
---
 hw/iommu/dual_stage_iommu.c         |  5 ++-
 hw/vfio/common.c                    | 85 ++++++++++++++++++++++++++++++++++++-
 include/hw/iommu/dual_stage_iommu.h | 10 ++++-
 3 files changed, 97 insertions(+), 3 deletions(-)

Comments

Peter Xu Feb. 11, 2020, 7:30 p.m. UTC | #1
On Wed, Jan 29, 2020 at 04:16:42AM -0800, Liu, Yi L wrote:
> From: Liu Yi L <yi.l.liu@intel.com>
> 
> VFIO checks IOMMU UAPI version when it finds Kernel supports
> VFIO_TYPE1_NESTING_IOMMU. It is enough for UAPI compatibility
> check. However, IOMMU UAPI may support multiple stage-1 pasid
> formats in a specific UAPI version, which is highly possible
> since IOMMU UAPI supports stage-1 formats across all IOMMU vendors.
> So VFIO needs to get the supported formats from Kernel and tell
> vIOMMU. Let vIOMMU select proper format when setup dual stage DMA
> translation.
> 
> This patch gets the stage-1 pasid format from kernel by using IOCTL
> VFIO_IOMMU_GET_INFO and pass the supported format to vIOMMU by the
> DualStageIOMMUObject instance which has been registered to vIOMMU.
> 
> This patch referred some code from Shameer Kolothum.
> https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html
> 
> Cc: Kevin Tian <kevin.tian@intel.com>
> Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Cc: Peter Xu <peterx@redhat.com>
> Cc: Eric Auger <eric.auger@redhat.com>
> Cc: Yi Sun <yi.y.sun@linux.intel.com>
> Cc: David Gibson <david@gibson.dropbear.id.au>
> Cc: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> ---
>  hw/iommu/dual_stage_iommu.c         |  5 ++-
>  hw/vfio/common.c                    | 85 ++++++++++++++++++++++++++++++++++++-
>  include/hw/iommu/dual_stage_iommu.h | 10 ++++-
>  3 files changed, 97 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/iommu/dual_stage_iommu.c b/hw/iommu/dual_stage_iommu.c
> index be4179d..d5a7168 100644
> --- a/hw/iommu/dual_stage_iommu.c
> +++ b/hw/iommu/dual_stage_iommu.c
> @@ -48,9 +48,12 @@ int ds_iommu_pasid_free(DualStageIOMMUObject *dsi_obj, uint32_t pasid)
>  }
>  
>  void ds_iommu_object_init(DualStageIOMMUObject *dsi_obj,
> -                          DualStageIOMMUOps *ops)
> +                          DualStageIOMMUOps *ops,
> +                          DualStageIOMMUInfo *uinfo)
>  {
>      dsi_obj->ops = ops;
> +
> +    dsi_obj->uinfo.pasid_format = uinfo->pasid_format;
>  }
>  
>  void ds_iommu_object_destroy(DualStageIOMMUObject *dsi_obj)
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index fc1723d..a07824b 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -1182,10 +1182,84 @@ static int vfio_get_iommu_type(VFIOContainer *container,
>  static struct DualStageIOMMUOps vfio_ds_iommu_ops = {
>  };
>  
> +static int vfio_get_iommu_info(VFIOContainer *container,
> +                         struct vfio_iommu_type1_info **info)

Better comment on the function to remember to free(*info) after use
for the callers.

> +{
> +
> +    size_t argsz = sizeof(struct vfio_iommu_type1_info);
> +

Nit: extra newline.

> +
> +    *info = g_malloc0(argsz);
> +
> +retry:
> +    (*info)->argsz = argsz;
> +
> +    if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
> +        g_free(*info);
> +        *info = NULL;
> +        return -errno;
> +    }
> +
> +    if (((*info)->argsz > argsz)) {
> +        argsz = (*info)->argsz;
> +        *info = g_realloc(*info, argsz);
> +        goto retry;
> +    }
> +
> +    return 0;
> +}
> +
> +static struct vfio_info_cap_header *
> +vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
> +{
> +    struct vfio_info_cap_header *hdr;
> +    void *ptr = info;
> +
> +    if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
> +        return NULL;
> +    }
> +
> +    for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
> +        if (hdr->id == id) {
> +            return hdr;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +static int vfio_get_nesting_iommu_format(VFIOContainer *container,
> +                                         uint32_t *pasid_format)
> +{
> +    struct vfio_iommu_type1_info *info;
> +    struct vfio_info_cap_header *hdr;
> +    struct vfio_iommu_type1_info_cap_nesting *cap;
> +
> +    if (vfio_get_iommu_info(container, &info)) {
> +        return -errno;

Should return the retcode from vfio_get_iommu_info.

> +    }
> +
> +    hdr = vfio_get_iommu_info_cap(info,
> +                        VFIO_IOMMU_TYPE1_INFO_CAP_NESTING);
> +    if (!hdr) {
> +        g_free(info);
> +        return -errno;
> +    }
> +
> +    cap = container_of(hdr,
> +                struct vfio_iommu_type1_info_cap_nesting, header);
> +    *pasid_format = cap->pasid_format;
> +
> +    g_free(info);
> +    return 0;
> +}
> +
>  static int vfio_init_container(VFIOContainer *container, int group_fd,
>                                 Error **errp)
>  {
>      int iommu_type, ret;
> +    uint32_t format;
> +    DualStageIOMMUInfo uinfo;
>  
>      iommu_type = vfio_get_iommu_type(container, errp);
>      if (iommu_type < 0) {
> @@ -1214,7 +1288,16 @@ static int vfio_init_container(VFIOContainer *container, int group_fd,
>      }
>  
>      if (iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
> -        ds_iommu_object_init(&container->dsi_obj, &vfio_ds_iommu_ops);
> +        if (vfio_get_nesting_iommu_format(container, &format)) {
> +            error_setg_errno(errp, errno,
> +                             "Failed to get nesting iommu format");
> +            return -errno;

Same here, you might want to return the retcode from
vfio_get_nesting_iommu_format()?

> +        }
> +
> +        uinfo.pasid_format = format;
> +        ds_iommu_object_init(&container->dsi_obj,
> +                             &vfio_ds_iommu_ops, &uinfo);
> +
>          if (iommu_context_register_ds_iommu(container->iommu_ctx,
>                                              &container->dsi_obj)) {
>              /*
> diff --git a/include/hw/iommu/dual_stage_iommu.h b/include/hw/iommu/dual_stage_iommu.h
> index e9891e3..c6100b4 100644
> --- a/include/hw/iommu/dual_stage_iommu.h
> +++ b/include/hw/iommu/dual_stage_iommu.h
> @@ -23,12 +23,14 @@
>  #define HW_DS_IOMMU_H
>  
>  #include "qemu/queue.h"
> +#include <linux/iommu.h>
>  #ifndef CONFIG_USER_ONLY
>  #include "exec/hwaddr.h"
>  #endif
>  
>  typedef struct DualStageIOMMUObject DualStageIOMMUObject;
>  typedef struct DualStageIOMMUOps DualStageIOMMUOps;
> +typedef struct DualStageIOMMUInfo DualStageIOMMUInfo;
>  
>  struct DualStageIOMMUOps {
>      /* Allocate pasid from DualStageIOMMU (a.k.a. host IOMMU) */
> @@ -41,11 +43,16 @@ struct DualStageIOMMUOps {
>                        uint32_t pasid);
>  };
>  
> +struct DualStageIOMMUInfo {
> +    uint32_t pasid_format;
> +};
> +
>  /*
>   * This is an abstraction of Dual-stage IOMMU.
>   */
>  struct DualStageIOMMUObject {
>      DualStageIOMMUOps *ops;
> +    DualStageIOMMUInfo uinfo;
>  };
>  
>  int ds_iommu_pasid_alloc(DualStageIOMMUObject *dsi_obj, uint32_t min,
> @@ -53,7 +60,8 @@ int ds_iommu_pasid_alloc(DualStageIOMMUObject *dsi_obj, uint32_t min,
>  int ds_iommu_pasid_free(DualStageIOMMUObject *dsi_obj, uint32_t pasid);
>  
>  void ds_iommu_object_init(DualStageIOMMUObject *dsi_obj,
> -                          DualStageIOMMUOps *ops);
> +                          DualStageIOMMUOps *ops,
> +                          DualStageIOMMUInfo *uinfo);
>  void ds_iommu_object_destroy(DualStageIOMMUObject *dsi_obj);
>  
>  #endif
> -- 
> 2.7.4
>
Yi Liu Feb. 12, 2020, 7:19 a.m. UTC | #2
> From: Peter Xu <peterx@redhat.com>
> Sent: Wednesday, February 12, 2020 3:30 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [RFC v3 11/25] vfio: get stage-1 pasid formats from Kernel
> 
> On Wed, Jan 29, 2020 at 04:16:42AM -0800, Liu, Yi L wrote:
> > From: Liu Yi L <yi.l.liu@intel.com>
> >
> > VFIO checks IOMMU UAPI version when it finds Kernel supports
> > VFIO_TYPE1_NESTING_IOMMU. It is enough for UAPI compatibility check.
> > However, IOMMU UAPI may support multiple stage-1 pasid formats in a
> > specific UAPI version, which is highly possible since IOMMU UAPI
> > supports stage-1 formats across all IOMMU vendors.
> > So VFIO needs to get the supported formats from Kernel and tell
> > vIOMMU. Let vIOMMU select proper format when setup dual stage DMA
> > translation.
> >
> > This patch gets the stage-1 pasid format from kernel by using IOCTL
> > VFIO_IOMMU_GET_INFO and pass the supported format to vIOMMU by the
> > DualStageIOMMUObject instance which has been registered to vIOMMU.
> >
> > This patch referred some code from Shameer Kolothum.
> > https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html
> >
> > Cc: Kevin Tian <kevin.tian@intel.com>
> > Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Cc: Peter Xu <peterx@redhat.com>
> > Cc: Eric Auger <eric.auger@redhat.com>
> > Cc: Yi Sun <yi.y.sun@linux.intel.com>
> > Cc: David Gibson <david@gibson.dropbear.id.au>
> > Cc: Alex Williamson <alex.williamson@redhat.com>
> > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > ---
> >  hw/iommu/dual_stage_iommu.c         |  5 ++-
> >  hw/vfio/common.c                    | 85
> ++++++++++++++++++++++++++++++++++++-
> >  include/hw/iommu/dual_stage_iommu.h | 10 ++++-
> >  3 files changed, 97 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/iommu/dual_stage_iommu.c
> b/hw/iommu/dual_stage_iommu.c
> > index be4179d..d5a7168 100644
> > --- a/hw/iommu/dual_stage_iommu.c
> > +++ b/hw/iommu/dual_stage_iommu.c
> > @@ -48,9 +48,12 @@ int ds_iommu_pasid_free(DualStageIOMMUObject
> > *dsi_obj, uint32_t pasid)  }
> >
> >  void ds_iommu_object_init(DualStageIOMMUObject *dsi_obj,
> > -                          DualStageIOMMUOps *ops)
> > +                          DualStageIOMMUOps *ops,
> > +                          DualStageIOMMUInfo *uinfo)
> >  {
> >      dsi_obj->ops = ops;
> > +
> > +    dsi_obj->uinfo.pasid_format = uinfo->pasid_format;
> >  }
> >
> >  void ds_iommu_object_destroy(DualStageIOMMUObject *dsi_obj) diff
> > --git a/hw/vfio/common.c b/hw/vfio/common.c index fc1723d..a07824b
> > 100644
> > --- a/hw/vfio/common.c
> > +++ b/hw/vfio/common.c
> > @@ -1182,10 +1182,84 @@ static int vfio_get_iommu_type(VFIOContainer
> > *container,  static struct DualStageIOMMUOps vfio_ds_iommu_ops = {  };
> >
> > +static int vfio_get_iommu_info(VFIOContainer *container,
> > +                         struct vfio_iommu_type1_info **info)
> 
> Better comment on the function to remember to free(*info) after use for the
> callers.

Will do. 
diff mbox series

Patch

diff --git a/hw/iommu/dual_stage_iommu.c b/hw/iommu/dual_stage_iommu.c
index be4179d..d5a7168 100644
--- a/hw/iommu/dual_stage_iommu.c
+++ b/hw/iommu/dual_stage_iommu.c
@@ -48,9 +48,12 @@  int ds_iommu_pasid_free(DualStageIOMMUObject *dsi_obj, uint32_t pasid)
 }
 
 void ds_iommu_object_init(DualStageIOMMUObject *dsi_obj,
-                          DualStageIOMMUOps *ops)
+                          DualStageIOMMUOps *ops,
+                          DualStageIOMMUInfo *uinfo)
 {
     dsi_obj->ops = ops;
+
+    dsi_obj->uinfo.pasid_format = uinfo->pasid_format;
 }
 
 void ds_iommu_object_destroy(DualStageIOMMUObject *dsi_obj)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index fc1723d..a07824b 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1182,10 +1182,84 @@  static int vfio_get_iommu_type(VFIOContainer *container,
 static struct DualStageIOMMUOps vfio_ds_iommu_ops = {
 };
 
+static int vfio_get_iommu_info(VFIOContainer *container,
+                         struct vfio_iommu_type1_info **info)
+{
+
+    size_t argsz = sizeof(struct vfio_iommu_type1_info);
+
+
+    *info = g_malloc0(argsz);
+
+retry:
+    (*info)->argsz = argsz;
+
+    if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
+        g_free(*info);
+        *info = NULL;
+        return -errno;
+    }
+
+    if (((*info)->argsz > argsz)) {
+        argsz = (*info)->argsz;
+        *info = g_realloc(*info, argsz);
+        goto retry;
+    }
+
+    return 0;
+}
+
+static struct vfio_info_cap_header *
+vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
+{
+    struct vfio_info_cap_header *hdr;
+    void *ptr = info;
+
+    if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
+        return NULL;
+    }
+
+    for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
+        if (hdr->id == id) {
+            return hdr;
+        }
+    }
+
+    return NULL;
+}
+
+static int vfio_get_nesting_iommu_format(VFIOContainer *container,
+                                         uint32_t *pasid_format)
+{
+    struct vfio_iommu_type1_info *info;
+    struct vfio_info_cap_header *hdr;
+    struct vfio_iommu_type1_info_cap_nesting *cap;
+
+    if (vfio_get_iommu_info(container, &info)) {
+        return -errno;
+    }
+
+    hdr = vfio_get_iommu_info_cap(info,
+                        VFIO_IOMMU_TYPE1_INFO_CAP_NESTING);
+    if (!hdr) {
+        g_free(info);
+        return -errno;
+    }
+
+    cap = container_of(hdr,
+                struct vfio_iommu_type1_info_cap_nesting, header);
+    *pasid_format = cap->pasid_format;
+
+    g_free(info);
+    return 0;
+}
+
 static int vfio_init_container(VFIOContainer *container, int group_fd,
                                Error **errp)
 {
     int iommu_type, ret;
+    uint32_t format;
+    DualStageIOMMUInfo uinfo;
 
     iommu_type = vfio_get_iommu_type(container, errp);
     if (iommu_type < 0) {
@@ -1214,7 +1288,16 @@  static int vfio_init_container(VFIOContainer *container, int group_fd,
     }
 
     if (iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
-        ds_iommu_object_init(&container->dsi_obj, &vfio_ds_iommu_ops);
+        if (vfio_get_nesting_iommu_format(container, &format)) {
+            error_setg_errno(errp, errno,
+                             "Failed to get nesting iommu format");
+            return -errno;
+        }
+
+        uinfo.pasid_format = format;
+        ds_iommu_object_init(&container->dsi_obj,
+                             &vfio_ds_iommu_ops, &uinfo);
+
         if (iommu_context_register_ds_iommu(container->iommu_ctx,
                                             &container->dsi_obj)) {
             /*
diff --git a/include/hw/iommu/dual_stage_iommu.h b/include/hw/iommu/dual_stage_iommu.h
index e9891e3..c6100b4 100644
--- a/include/hw/iommu/dual_stage_iommu.h
+++ b/include/hw/iommu/dual_stage_iommu.h
@@ -23,12 +23,14 @@ 
 #define HW_DS_IOMMU_H
 
 #include "qemu/queue.h"
+#include <linux/iommu.h>
 #ifndef CONFIG_USER_ONLY
 #include "exec/hwaddr.h"
 #endif
 
 typedef struct DualStageIOMMUObject DualStageIOMMUObject;
 typedef struct DualStageIOMMUOps DualStageIOMMUOps;
+typedef struct DualStageIOMMUInfo DualStageIOMMUInfo;
 
 struct DualStageIOMMUOps {
     /* Allocate pasid from DualStageIOMMU (a.k.a. host IOMMU) */
@@ -41,11 +43,16 @@  struct DualStageIOMMUOps {
                       uint32_t pasid);
 };
 
+struct DualStageIOMMUInfo {
+    uint32_t pasid_format;
+};
+
 /*
  * This is an abstraction of Dual-stage IOMMU.
  */
 struct DualStageIOMMUObject {
     DualStageIOMMUOps *ops;
+    DualStageIOMMUInfo uinfo;
 };
 
 int ds_iommu_pasid_alloc(DualStageIOMMUObject *dsi_obj, uint32_t min,
@@ -53,7 +60,8 @@  int ds_iommu_pasid_alloc(DualStageIOMMUObject *dsi_obj, uint32_t min,
 int ds_iommu_pasid_free(DualStageIOMMUObject *dsi_obj, uint32_t pasid);
 
 void ds_iommu_object_init(DualStageIOMMUObject *dsi_obj,
-                          DualStageIOMMUOps *ops);
+                          DualStageIOMMUOps *ops,
+                          DualStageIOMMUInfo *uinfo);
 void ds_iommu_object_destroy(DualStageIOMMUObject *dsi_obj);
 
 #endif