diff mbox series

[v4,09/14] vfio-user: handle DMA mappings

Message ID 2c9baf82a342cfc4ff3d35e017908b9050faf409.1639549843.git.jag.raman@oracle.com (mailing list archive)
State New, archived
Headers show
Series vfio-user server in QEMU | expand

Commit Message

Jag Raman Dec. 15, 2021, 3:35 p.m. UTC
Define and register callbacks to manage the RAM regions used for
device DMA

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/remote/vfio-user-obj.c | 48 +++++++++++++++++++++++++++++++++++++++
 hw/remote/trace-events    |  2 ++
 2 files changed, 50 insertions(+)

Comments

Stefan Hajnoczi Dec. 16, 2021, 1:24 p.m. UTC | #1
On Wed, Dec 15, 2021 at 10:35:33AM -0500, Jagannathan Raman wrote:
> Define and register callbacks to manage the RAM regions used for
> device DMA
> 
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  hw/remote/vfio-user-obj.c | 48 +++++++++++++++++++++++++++++++++++++++
>  hw/remote/trace-events    |  2 ++
>  2 files changed, 50 insertions(+)
> 
> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
> index c6d0c675b7..46f2251a68 100644
> --- a/hw/remote/vfio-user-obj.c
> +++ b/hw/remote/vfio-user-obj.c
> @@ -208,6 +208,47 @@ static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
>      return count;
>  }
>  
> +static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
> +{
> +    MemoryRegion *subregion = NULL;
> +    g_autofree char *name = NULL;
> +    static unsigned int suffix;
> +    struct iovec *iov = &info->iova;
> +
> +    if (!info->vaddr) {
> +        return;
> +    }
> +
> +    name = g_strdup_printf("remote-mem-%u", suffix++);
> +
> +    subregion = g_new0(MemoryRegion, 1);
> +
> +    memory_region_init_ram_ptr(subregion, NULL, name,
> +                               iov->iov_len, info->vaddr);
> +
> +    memory_region_add_subregion(get_system_memory(), (hwaddr)iov->iov_base,
> +                                subregion);
> +
> +    trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
> +}
> +
> +static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
> +{
> +    MemoryRegion *mr = NULL;
> +    ram_addr_t offset;
> +
> +    mr = memory_region_from_host(info->vaddr, &offset);
> +    if (!mr) {
> +        return;
> +    }
> +
> +    memory_region_del_subregion(get_system_memory(), mr);
> +
> +    object_unparent((OBJECT(mr)));
> +
> +    trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
> +}

This does not support hot unplug (memory regions pointing to memory
mapped by libvfio-user are left registered). The code should keep a list
(e.g. https://docs.gtk.org/glib/struct.SList.html) of memory regions and
automatically remove them before destroying the vfu context.

It also doesn't support multiple vfio-user server instances running in
the same QEMU process. get_system_memory() is global but the memory
regions provided by vfio-user are per-client (i.e. VM). If multiple VMs
are connected to one vfio-user server process then they conflict.

I don't know the best way to support multiple vfio-user server
instances, it would be straightforward if QEMU supported multiple
MachineStates and didn't use global get_system_memory()/get_io_memory()
APIs. It would be nice to solve that in the future.

Maybe it's too hard to change that, I haven't looked. An alternative is
to make the x-remote machine empty (it doesn't create any devices) and
instead create a new PCI bus, interrupt controller, memory MemoryRegion,
and io MemoryRegion in VfuObject. Stop using get_system_memory() and
instead use the per-VfuObject memory MemoryRegion.

In either of those approaches it's probably necessary to specify the PCI
bus ID in --device and device_add so it's clear which vfio-user server
the PCI device is associated with.

The multiple vfio-user server instance limitation doesn't need to be
solved now, but I wanted to share some ideas around it. Maybe someone
has better ideas or is aware of limitations preventing what I described.

Stefan
Jag Raman Dec. 17, 2021, 7:11 p.m. UTC | #2
> On Dec 16, 2021, at 8:24 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> 
> On Wed, Dec 15, 2021 at 10:35:33AM -0500, Jagannathan Raman wrote:
>> Define and register callbacks to manage the RAM regions used for
>> device DMA
>> 
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
>> ---
>> hw/remote/vfio-user-obj.c | 48 +++++++++++++++++++++++++++++++++++++++
>> hw/remote/trace-events    |  2 ++
>> 2 files changed, 50 insertions(+)
>> 
>> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
>> index c6d0c675b7..46f2251a68 100644
>> --- a/hw/remote/vfio-user-obj.c
>> +++ b/hw/remote/vfio-user-obj.c
>> @@ -208,6 +208,47 @@ static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
>>     return count;
>> }
>> 
>> +static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
>> +{
>> +    MemoryRegion *subregion = NULL;
>> +    g_autofree char *name = NULL;
>> +    static unsigned int suffix;
>> +    struct iovec *iov = &info->iova;
>> +
>> +    if (!info->vaddr) {
>> +        return;
>> +    }
>> +
>> +    name = g_strdup_printf("remote-mem-%u", suffix++);
>> +
>> +    subregion = g_new0(MemoryRegion, 1);
>> +
>> +    memory_region_init_ram_ptr(subregion, NULL, name,
>> +                               iov->iov_len, info->vaddr);
>> +
>> +    memory_region_add_subregion(get_system_memory(), (hwaddr)iov->iov_base,
>> +                                subregion);
>> +
>> +    trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
>> +}
>> +
>> +static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
>> +{
>> +    MemoryRegion *mr = NULL;
>> +    ram_addr_t offset;
>> +
>> +    mr = memory_region_from_host(info->vaddr, &offset);
>> +    if (!mr) {
>> +        return;
>> +    }
>> +
>> +    memory_region_del_subregion(get_system_memory(), mr);
>> +
>> +    object_unparent((OBJECT(mr)));
>> +
>> +    trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
>> +}
> 
> This does not support hot unplug (memory regions pointing to memory
> mapped by libvfio-user are left registered). The code should keep a list
> (e.g. https://docs.gtk.org/glib/struct.SList.html) of memory regions and
> automatically remove them before destroying the vfu context.
> 
> It also doesn't support multiple vfio-user server instances running in
> the same QEMU process. get_system_memory() is global but the memory
> regions provided by vfio-user are per-client (i.e. VM). If multiple VMs
> are connected to one vfio-user server process then they conflict.
> 
> I don't know the best way to support multiple vfio-user server
> instances, it would be straightforward if QEMU supported multiple
> MachineStates and didn't use global get_system_memory()/get_io_memory()
> APIs. It would be nice to solve that in the future.

We’ve addressed the multiple vfio-user-server instances in
"[PATCH v4 11/14] vfio-user: IOMMU support for remote device” patch
down the line. I see your comments there, will address them.

Thank you!
--
Jag

> 
> Maybe it's too hard to change that, I haven't looked. An alternative is
> to make the x-remote machine empty (it doesn't create any devices) and
> instead create a new PCI bus, interrupt controller, memory MemoryRegion,
> and io MemoryRegion in VfuObject. Stop using get_system_memory() and
> instead use the per-VfuObject memory MemoryRegion.
> 
> In either of those approaches it's probably necessary to specify the PCI
> bus ID in --device and device_add so it's clear which vfio-user server
> the PCI device is associated with.
> 
> The multiple vfio-user server instance limitation doesn't need to be
> solved now, but I wanted to share some ideas around it. Maybe someone
> has better ideas or is aware of limitations preventing what I described.
> 
> Stefan
diff mbox series

Patch

diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index c6d0c675b7..46f2251a68 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -208,6 +208,47 @@  static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
     return count;
 }
 
+static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
+{
+    MemoryRegion *subregion = NULL;
+    g_autofree char *name = NULL;
+    static unsigned int suffix;
+    struct iovec *iov = &info->iova;
+
+    if (!info->vaddr) {
+        return;
+    }
+
+    name = g_strdup_printf("remote-mem-%u", suffix++);
+
+    subregion = g_new0(MemoryRegion, 1);
+
+    memory_region_init_ram_ptr(subregion, NULL, name,
+                               iov->iov_len, info->vaddr);
+
+    memory_region_add_subregion(get_system_memory(), (hwaddr)iov->iov_base,
+                                subregion);
+
+    trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
+}
+
+static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
+{
+    MemoryRegion *mr = NULL;
+    ram_addr_t offset;
+
+    mr = memory_region_from_host(info->vaddr, &offset);
+    if (!mr) {
+        return;
+    }
+
+    memory_region_del_subregion(get_system_memory(), mr);
+
+    object_unparent((OBJECT(mr)));
+
+    trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
+}
+
 /*
  * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
  * properties. It also depends on devices instantiated in QEMU. These
@@ -289,6 +330,13 @@  static void vfu_object_init_ctx(VfuObject *o, Error **errp)
         goto fail;
     }
 
+    ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister);
+    if (ret < 0) {
+        error_setg(errp, "vfu: Failed to setup DMA handlers for %s",
+                   o->device);
+        goto fail;
+    }
+
     ret = vfu_realize_ctx(o->vfu_ctx);
     if (ret < 0) {
         error_setg(errp, "vfu: Failed to realize device %s- %s",
diff --git a/hw/remote/trace-events b/hw/remote/trace-events
index 2ef7884346..f945c7e33b 100644
--- a/hw/remote/trace-events
+++ b/hw/remote/trace-events
@@ -7,3 +7,5 @@  mpqemu_recv_io_error(int cmd, int size, int nfds) "failed to receive %d size %d,
 vfu_prop(const char *prop, const char *val) "vfu: setting %s as %s"
 vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u -> 0x%x"
 vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u <- 0x%x"
+vfu_dma_register(uint64_t gpa, size_t len) "vfu: registering GPA 0x%"PRIx64", %zu bytes"
+vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64""