Message ID | 1592684486-18511-12-git-send-email-kwankhede@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add migration support for VFIO devices | expand |
On Sun, 21 Jun 2020 01:51:20 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > Added helper functions to get IOMMU info capability chain. > Added function to get migration capability information from that > capability chain for IOMMU container. > > Similar change was proposed earlier: > https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Cc: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > Cc: Eric Auger <eric.auger@redhat.com> > --- > hw/vfio/common.c | 91 +++++++++++++++++++++++++++++++++++++++---- > include/hw/vfio/vfio-common.h | 3 ++ > 2 files changed, 86 insertions(+), 8 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 90e9a854d82c..e0d3d4585a65 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -1229,6 +1229,75 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, > return 0; > } > > +static int vfio_get_iommu_info(VFIOContainer *container, > + struct vfio_iommu_type1_info **info) > +{ > + > + size_t argsz = sizeof(struct vfio_iommu_type1_info); > + > + *info = g_new0(struct vfio_iommu_type1_info, 1); > +again: > + (*info)->argsz = argsz; > + > + if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { > + g_free(*info); > + *info = NULL; > + return -errno; > + } > + > + if (((*info)->argsz > argsz)) { > + argsz = (*info)->argsz; > + *info = g_realloc(*info, argsz); Do we need to guard against getting a bogus argsz value causing a huge allocation that might fail and crash the program? > + goto again; > + } > + > + return 0; > +} (...) > @@ -1314,15 +1384,20 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, > * existing Type1 IOMMUs generally support any IOVA we're > * going to actually try in practice. > */ > - info.argsz = sizeof(info); > - ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info); > - /* Ignore errors */ > - if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) { > + ret = vfio_get_iommu_info(container, &info); Previously, we ignored errors from the IOMMU_GET_INFO ioctl, now we error out. Was that change intended? > + if (ret) { > + goto free_container_exit; > + } > + > + if (!(info->flags & VFIO_IOMMU_INFO_PGSIZES)) { > /* Assume 4k IOVA page size */ > - info.iova_pgsizes = 4096; > + info->iova_pgsizes = 4096; > } > - vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes); > - container->pgsizes = info.iova_pgsizes; > + vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes); > + container->pgsizes = info->iova_pgsizes; > + > + vfio_get_iommu_info_migration(container, info); > + g_free(info); > break; > } > case VFIO_SPAPR_TCE_v2_IOMMU: (...)
On Sun, 21 Jun 2020 01:51:20 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > Added helper functions to get IOMMU info capability chain. > Added function to get migration capability information from that > capability chain for IOMMU container. > > Similar change was proposed earlier: > https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Cc: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > Cc: Eric Auger <eric.auger@redhat.com> > --- > hw/vfio/common.c | 91 +++++++++++++++++++++++++++++++++++++++---- > include/hw/vfio/vfio-common.h | 3 ++ > 2 files changed, 86 insertions(+), 8 deletions(-) > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 90e9a854d82c..e0d3d4585a65 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -1229,6 +1229,75 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, > return 0; > } > > +static int vfio_get_iommu_info(VFIOContainer *container, > + struct vfio_iommu_type1_info **info) > +{ > + > + size_t argsz = sizeof(struct vfio_iommu_type1_info); > + > + *info = g_new0(struct vfio_iommu_type1_info, 1); > +again: > + (*info)->argsz = argsz; > + > + if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { > + g_free(*info); > + *info = NULL; > + return -errno; > + } > + > + if (((*info)->argsz > argsz)) { > + argsz = (*info)->argsz; > + *info = g_realloc(*info, argsz); > + goto again; > + } > + > + return 0; > +} > + > +static struct vfio_info_cap_header * > +vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) > +{ > + struct vfio_info_cap_header *hdr; > + void *ptr = info; > + > + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { > + return NULL; > + } > + > + for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { > + if (hdr->id == id) { > + return hdr; > + } > + } > + > + return NULL; > +} > + > +static void vfio_get_iommu_info_migration(VFIOContainer *container, > + struct vfio_iommu_type1_info *info) > +{ > + struct vfio_info_cap_header *hdr; > + struct vfio_iommu_type1_info_cap_migration *cap_mig; > + > + hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); > + if (!hdr) { > + return; > + } > + > + cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration, > + header); > + > + container->dirty_pages_supported = true; > + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; > + container->dirty_pgsizes = cap_mig->pgsize_bitmap; > + > + /* > + * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of > + * TARGET_PAGE_SIZE to mark those dirty. > + */ > + assert(container->dirty_pgsizes & TARGET_PAGE_SIZE); Why assert versus simply not support dirty page tracking and therefore migration of contained devices? > +} > + > static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, > Error **errp) > { > @@ -1293,6 +1362,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, > container->space = space; > container->fd = fd; > container->error = NULL; > + container->dirty_pages_supported = false; > QLIST_INIT(&container->giommu_list); > QLIST_INIT(&container->hostwin_list); > > @@ -1305,7 +1375,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, > case VFIO_TYPE1v2_IOMMU: > case VFIO_TYPE1_IOMMU: > { > - struct vfio_iommu_type1_info info; > + struct vfio_iommu_type1_info *info; > > /* > * FIXME: This assumes that a Type1 IOMMU can map any 64-bit > @@ -1314,15 +1384,20 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, > * existing Type1 IOMMUs generally support any IOVA we're > * going to actually try in practice. > */ > - info.argsz = sizeof(info); > - ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info); > - /* Ignore errors */ > - if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) { > + ret = vfio_get_iommu_info(container, &info); > + if (ret) { > + goto free_container_exit; This was previously not fatal, why is it now? Thanks, Alex > + } > + > + if (!(info->flags & VFIO_IOMMU_INFO_PGSIZES)) { > /* Assume 4k IOVA page size */ > - info.iova_pgsizes = 4096; > + info->iova_pgsizes = 4096; > } > - vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes); > - container->pgsizes = info.iova_pgsizes; > + vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes); > + container->pgsizes = info->iova_pgsizes; > + > + vfio_get_iommu_info_migration(container, info); > + g_free(info); > break; > } > case VFIO_SPAPR_TCE_v2_IOMMU: > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index c78033e4149d..5a57a78ec517 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -79,6 +79,9 @@ typedef struct VFIOContainer { > unsigned iommu_type; > Error *error; > bool initialized; > + bool dirty_pages_supported; > + uint64_t dirty_pgsizes; > + uint64_t max_dirty_bitmap_size; > unsigned long pgsizes; > QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; > QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
On 6/25/2020 12:25 AM, Alex Williamson wrote: > On Sun, 21 Jun 2020 01:51:20 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> Added helper functions to get IOMMU info capability chain. >> Added function to get migration capability information from that >> capability chain for IOMMU container. >> >> Similar change was proposed earlier: >> https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Cc: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> >> Cc: Eric Auger <eric.auger@redhat.com> >> --- >> hw/vfio/common.c | 91 +++++++++++++++++++++++++++++++++++++++---- >> include/hw/vfio/vfio-common.h | 3 ++ >> 2 files changed, 86 insertions(+), 8 deletions(-) >> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index 90e9a854d82c..e0d3d4585a65 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -1229,6 +1229,75 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, >> return 0; >> } >> >> +static int vfio_get_iommu_info(VFIOContainer *container, >> + struct vfio_iommu_type1_info **info) >> +{ >> + >> + size_t argsz = sizeof(struct vfio_iommu_type1_info); >> + >> + *info = g_new0(struct vfio_iommu_type1_info, 1); >> +again: >> + (*info)->argsz = argsz; >> + >> + if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { >> + g_free(*info); >> + *info = NULL; >> + return -errno; >> + } >> + >> + if (((*info)->argsz > argsz)) { >> + argsz = (*info)->argsz; >> + *info = g_realloc(*info, argsz); >> + goto again; >> + } >> + >> + return 0; >> +} >> + >> +static struct vfio_info_cap_header * >> +vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) >> +{ >> + struct vfio_info_cap_header *hdr; >> + void *ptr = info; >> + >> + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { >> + return NULL; >> + } >> + >> + for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { >> + if (hdr->id == id) { >> + return hdr; >> + } >> + } >> + >> + return NULL; >> +} >> + >> +static void vfio_get_iommu_info_migration(VFIOContainer *container, >> + struct vfio_iommu_type1_info *info) >> +{ >> + struct vfio_info_cap_header *hdr; >> + struct vfio_iommu_type1_info_cap_migration *cap_mig; >> + >> + hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); >> + if (!hdr) { >> + return; >> + } >> + >> + cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration, >> + header); >> + >> + container->dirty_pages_supported = true; >> + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; >> + container->dirty_pgsizes = cap_mig->pgsize_bitmap; >> + >> + /* >> + * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of >> + * TARGET_PAGE_SIZE to mark those dirty. >> + */ >> + assert(container->dirty_pgsizes & TARGET_PAGE_SIZE); > > Why assert versus simply not support dirty page tracking and therefore > migration of contained devices? > Ok, that can be done. >> +} >> + >> static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, >> Error **errp) >> { >> @@ -1293,6 +1362,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, >> container->space = space; >> container->fd = fd; >> container->error = NULL; >> + container->dirty_pages_supported = false; >> QLIST_INIT(&container->giommu_list); >> QLIST_INIT(&container->hostwin_list); >> >> @@ -1305,7 +1375,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, >> case VFIO_TYPE1v2_IOMMU: >> case VFIO_TYPE1_IOMMU: >> { >> - struct vfio_iommu_type1_info info; >> + struct vfio_iommu_type1_info *info; >> >> /* >> * FIXME: This assumes that a Type1 IOMMU can map any 64-bit >> @@ -1314,15 +1384,20 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, >> * existing Type1 IOMMUs generally support any IOVA we're >> * going to actually try in practice. >> */ >> - info.argsz = sizeof(info); >> - ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info); >> - /* Ignore errors */ >> - if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) { >> + ret = vfio_get_iommu_info(container, &info); >> + if (ret) { >> + goto free_container_exit; > > This was previously not fatal, why is it now? Thanks, > Cornelia asked same question. Then what should be the action if ioctl fails? Disable migration? Thanks, Kirti > Alex > >> + } >> + >> + if (!(info->flags & VFIO_IOMMU_INFO_PGSIZES)) { >> /* Assume 4k IOVA page size */ >> - info.iova_pgsizes = 4096; >> + info->iova_pgsizes = 4096; >> } >> - vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes); >> - container->pgsizes = info.iova_pgsizes; >> + vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes); >> + container->pgsizes = info->iova_pgsizes; >> + >> + vfio_get_iommu_info_migration(container, info); >> + g_free(info); >> break; >> } >> case VFIO_SPAPR_TCE_v2_IOMMU: >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index c78033e4149d..5a57a78ec517 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -79,6 +79,9 @@ typedef struct VFIOContainer { >> unsigned iommu_type; >> Error *error; >> bool initialized; >> + bool dirty_pages_supported; >> + uint64_t dirty_pgsizes; >> + uint64_t max_dirty_bitmap_size; >> unsigned long pgsizes; >> QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; >> QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; >
On Thu, 25 Jun 2020 19:39:08 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 6/25/2020 12:25 AM, Alex Williamson wrote: > > On Sun, 21 Jun 2020 01:51:20 +0530 > > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > >> Added helper functions to get IOMMU info capability chain. > >> Added function to get migration capability information from that > >> capability chain for IOMMU container. > >> > >> Similar change was proposed earlier: > >> https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html > >> > >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > >> Cc: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > >> Cc: Eric Auger <eric.auger@redhat.com> > >> --- > >> hw/vfio/common.c | 91 +++++++++++++++++++++++++++++++++++++++---- > >> include/hw/vfio/vfio-common.h | 3 ++ > >> 2 files changed, 86 insertions(+), 8 deletions(-) > >> > >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c > >> index 90e9a854d82c..e0d3d4585a65 100644 > >> --- a/hw/vfio/common.c > >> +++ b/hw/vfio/common.c > >> @@ -1229,6 +1229,75 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, > >> return 0; > >> } > >> > >> +static int vfio_get_iommu_info(VFIOContainer *container, > >> + struct vfio_iommu_type1_info **info) > >> +{ > >> + > >> + size_t argsz = sizeof(struct vfio_iommu_type1_info); > >> + > >> + *info = g_new0(struct vfio_iommu_type1_info, 1); > >> +again: > >> + (*info)->argsz = argsz; > >> + > >> + if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { > >> + g_free(*info); > >> + *info = NULL; > >> + return -errno; > >> + } > >> + > >> + if (((*info)->argsz > argsz)) { > >> + argsz = (*info)->argsz; > >> + *info = g_realloc(*info, argsz); > >> + goto again; > >> + } > >> + > >> + return 0; > >> +} > >> + > >> +static struct vfio_info_cap_header * > >> +vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) > >> +{ > >> + struct vfio_info_cap_header *hdr; > >> + void *ptr = info; > >> + > >> + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { > >> + return NULL; > >> + } > >> + > >> + for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { > >> + if (hdr->id == id) { > >> + return hdr; > >> + } > >> + } > >> + > >> + return NULL; > >> +} > >> + > >> +static void vfio_get_iommu_info_migration(VFIOContainer *container, > >> + struct vfio_iommu_type1_info *info) > >> +{ > >> + struct vfio_info_cap_header *hdr; > >> + struct vfio_iommu_type1_info_cap_migration *cap_mig; > >> + > >> + hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); > >> + if (!hdr) { > >> + return; > >> + } > >> + > >> + cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration, > >> + header); > >> + > >> + container->dirty_pages_supported = true; > >> + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; > >> + container->dirty_pgsizes = cap_mig->pgsize_bitmap; > >> + > >> + /* > >> + * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of > >> + * TARGET_PAGE_SIZE to mark those dirty. > >> + */ > >> + assert(container->dirty_pgsizes & TARGET_PAGE_SIZE); > > > > Why assert versus simply not support dirty page tracking and therefore > > migration of contained devices? > > > > Ok, that can be done. > > >> +} > >> + > >> static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, > >> Error **errp) > >> { > >> @@ -1293,6 +1362,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, > >> container->space = space; > >> container->fd = fd; > >> container->error = NULL; > >> + container->dirty_pages_supported = false; > >> QLIST_INIT(&container->giommu_list); > >> QLIST_INIT(&container->hostwin_list); > >> > >> @@ -1305,7 +1375,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, > >> case VFIO_TYPE1v2_IOMMU: > >> case VFIO_TYPE1_IOMMU: > >> { > >> - struct vfio_iommu_type1_info info; > >> + struct vfio_iommu_type1_info *info; > >> > >> /* > >> * FIXME: This assumes that a Type1 IOMMU can map any 64-bit > >> @@ -1314,15 +1384,20 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, > >> * existing Type1 IOMMUs generally support any IOVA we're > >> * going to actually try in practice. > >> */ > >> - info.argsz = sizeof(info); > >> - ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info); > >> - /* Ignore errors */ > >> - if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) { > >> + ret = vfio_get_iommu_info(container, &info); > >> + if (ret) { > >> + goto free_container_exit; > > > > This was previously not fatal, why is it now? Thanks, > > > > Cornelia asked same question. > Then what should be the action if ioctl fails? Disable migration? Yes, new features shouldn't impose constraints not previously present. Thanks, Alex > >> + } > >> + > >> + if (!(info->flags & VFIO_IOMMU_INFO_PGSIZES)) { > >> /* Assume 4k IOVA page size */ > >> - info.iova_pgsizes = 4096; > >> + info->iova_pgsizes = 4096; > >> } > >> - vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes); > >> - container->pgsizes = info.iova_pgsizes; > >> + vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes); > >> + container->pgsizes = info->iova_pgsizes; > >> + > >> + vfio_get_iommu_info_migration(container, info); > >> + g_free(info); > >> break; > >> } > >> case VFIO_SPAPR_TCE_v2_IOMMU: > >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > >> index c78033e4149d..5a57a78ec517 100644 > >> --- a/include/hw/vfio/vfio-common.h > >> +++ b/include/hw/vfio/vfio-common.h > >> @@ -79,6 +79,9 @@ typedef struct VFIOContainer { > >> unsigned iommu_type; > >> Error *error; > >> bool initialized; > >> + bool dirty_pages_supported; > >> + uint64_t dirty_pgsizes; > >> + uint64_t max_dirty_bitmap_size; > >> unsigned long pgsizes; > >> QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; > >> QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; > > >
diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 90e9a854d82c..e0d3d4585a65 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1229,6 +1229,75 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, return 0; } +static int vfio_get_iommu_info(VFIOContainer *container, + struct vfio_iommu_type1_info **info) +{ + + size_t argsz = sizeof(struct vfio_iommu_type1_info); + + *info = g_new0(struct vfio_iommu_type1_info, 1); +again: + (*info)->argsz = argsz; + + if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { + g_free(*info); + *info = NULL; + return -errno; + } + + if (((*info)->argsz > argsz)) { + argsz = (*info)->argsz; + *info = g_realloc(*info, argsz); + goto again; + } + + return 0; +} + +static struct vfio_info_cap_header * +vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) +{ + struct vfio_info_cap_header *hdr; + void *ptr = info; + + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { + return NULL; + } + + for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { + if (hdr->id == id) { + return hdr; + } + } + + return NULL; +} + +static void vfio_get_iommu_info_migration(VFIOContainer *container, + struct vfio_iommu_type1_info *info) +{ + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_migration *cap_mig; + + hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); + if (!hdr) { + return; + } + + cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration, + header); + + container->dirty_pages_supported = true; + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; + container->dirty_pgsizes = cap_mig->pgsize_bitmap; + + /* + * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of + * TARGET_PAGE_SIZE to mark those dirty. + */ + assert(container->dirty_pgsizes & TARGET_PAGE_SIZE); +} + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, Error **errp) { @@ -1293,6 +1362,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->space = space; container->fd = fd; container->error = NULL; + container->dirty_pages_supported = false; QLIST_INIT(&container->giommu_list); QLIST_INIT(&container->hostwin_list); @@ -1305,7 +1375,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, case VFIO_TYPE1v2_IOMMU: case VFIO_TYPE1_IOMMU: { - struct vfio_iommu_type1_info info; + struct vfio_iommu_type1_info *info; /* * FIXME: This assumes that a Type1 IOMMU can map any 64-bit @@ -1314,15 +1384,20 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, * existing Type1 IOMMUs generally support any IOVA we're * going to actually try in practice. */ - info.argsz = sizeof(info); - ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info); - /* Ignore errors */ - if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) { + ret = vfio_get_iommu_info(container, &info); + if (ret) { + goto free_container_exit; + } + + if (!(info->flags & VFIO_IOMMU_INFO_PGSIZES)) { /* Assume 4k IOVA page size */ - info.iova_pgsizes = 4096; + info->iova_pgsizes = 4096; } - vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes); - container->pgsizes = info.iova_pgsizes; + vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes); + container->pgsizes = info->iova_pgsizes; + + vfio_get_iommu_info_migration(container, info); + g_free(info); break; } case VFIO_SPAPR_TCE_v2_IOMMU: diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index c78033e4149d..5a57a78ec517 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -79,6 +79,9 @@ typedef struct VFIOContainer { unsigned iommu_type; Error *error; bool initialized; + bool dirty_pages_supported; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; unsigned long pgsizes; QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
Added helper functions to get IOMMU info capability chain. Added function to get migration capability information from that capability chain for IOMMU container. Similar change was proposed earlier: https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> Cc: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> Cc: Eric Auger <eric.auger@redhat.com> --- hw/vfio/common.c | 91 +++++++++++++++++++++++++++++++++++++++---- include/hw/vfio/vfio-common.h | 3 ++ 2 files changed, 86 insertions(+), 8 deletions(-)