diff mbox series

[3/3] vfio-user: Message-based DMA support

Message ID 20230704080628.852525-4-mnissler@rivosinc.com (mailing list archive)
State New, archived
Headers show
Series Support message-based DMA in vfio-user server | expand

Commit Message

Mattias Nissler July 4, 2023, 8:06 a.m. UTC
Wire up support for DMA for the case where the vfio-user client does not
provide mmap()-able file descriptors, but DMA requests must be performed
via the VFIO-user protocol. This installs an indirect memory region,
which already works for pci_dma_{read,write}, and pci_dma_map works
thanks to the existing DMA bounce buffering support.

Note that while simple scenarios work with this patch, there's a known
race condition in libvfio-user that will mess up the communication
channel: https://github.com/nutanix/libvfio-user/issues/279 I intend to
contribute a fix for this problem, see discussion on the github issue
for more details.

Signed-off-by: Mattias Nissler <mnissler@rivosinc.com>
---
 hw/remote/vfio-user-obj.c | 62 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 7 deletions(-)

Comments

Stefan Hajnoczi July 20, 2023, 6:32 p.m. UTC | #1
On Tue, Jul 04, 2023 at 01:06:27AM -0700, Mattias Nissler wrote:
> Wire up support for DMA for the case where the vfio-user client does not
> provide mmap()-able file descriptors, but DMA requests must be performed
> via the VFIO-user protocol. This installs an indirect memory region,
> which already works for pci_dma_{read,write}, and pci_dma_map works
> thanks to the existing DMA bounce buffering support.
> 
> Note that while simple scenarios work with this patch, there's a known
> race condition in libvfio-user that will mess up the communication
> channel: https://github.com/nutanix/libvfio-user/issues/279 I intend to
> contribute a fix for this problem, see discussion on the github issue
> for more details.
> 
> Signed-off-by: Mattias Nissler <mnissler@rivosinc.com>
> ---
>  hw/remote/vfio-user-obj.c | 62 ++++++++++++++++++++++++++++++++++-----
>  1 file changed, 55 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
> index 8b10c32a3c..9799580c77 100644
> --- a/hw/remote/vfio-user-obj.c
> +++ b/hw/remote/vfio-user-obj.c
> @@ -300,6 +300,53 @@ static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
>      return count;
>  }
>  
> +static MemTxResult vfu_dma_read(void *opaque, hwaddr addr, uint64_t *val,
> +                                unsigned size, MemTxAttrs attrs)
> +{
> +    MemoryRegion *region = opaque;
> +    VfuObject *o = VFU_OBJECT(region->owner);
> +
> +    dma_sg_t *sg = alloca(dma_sg_size());
> +    vfu_dma_addr_t vfu_addr = (vfu_dma_addr_t)(region->addr + addr);
> +    if (vfu_addr_to_sgl(o->vfu_ctx, vfu_addr, size, sg, 1, PROT_READ) < 0 ||
> +        vfu_sgl_read(o->vfu_ctx, sg, 1, val) != 0) {

Does this work on big-endian host CPUs? It looks like reading 0x12345678
into uint64_t val would result in *val = 0x12345678xxxxxxxx instead of
0x12345678.

> +        return MEMTX_ERROR;
> +    }
> +
> +    return MEMTX_OK;
> +}
> +
> +static MemTxResult vfu_dma_write(void *opaque, hwaddr addr, uint64_t val,
> +                                 unsigned size, MemTxAttrs attrs)
> +{
> +    MemoryRegion *region = opaque;
> +    VfuObject *o = VFU_OBJECT(region->owner);
> +
> +    dma_sg_t *sg = alloca(dma_sg_size());
> +    vfu_dma_addr_t vfu_addr = (vfu_dma_addr_t)(region->addr + addr);
> +    if (vfu_addr_to_sgl(o->vfu_ctx, vfu_addr, size, sg, 1, PROT_WRITE) < 0 ||
> +        vfu_sgl_write(o->vfu_ctx, sg, 1, &val) != 0)  {

Same potential endianness issue here.

Stefan
Mattias Nissler Aug. 23, 2023, 9:28 a.m. UTC | #2
On Thu, Jul 20, 2023 at 8:32 PM Stefan Hajnoczi <stefanha@redhat.com> wrote:
>
> On Tue, Jul 04, 2023 at 01:06:27AM -0700, Mattias Nissler wrote:
> > Wire up support for DMA for the case where the vfio-user client does not
> > provide mmap()-able file descriptors, but DMA requests must be performed
> > via the VFIO-user protocol. This installs an indirect memory region,
> > which already works for pci_dma_{read,write}, and pci_dma_map works
> > thanks to the existing DMA bounce buffering support.
> >
> > Note that while simple scenarios work with this patch, there's a known
> > race condition in libvfio-user that will mess up the communication
> > channel: https://github.com/nutanix/libvfio-user/issues/279 I intend to
> > contribute a fix for this problem, see discussion on the github issue
> > for more details.
> >
> > Signed-off-by: Mattias Nissler <mnissler@rivosinc.com>
> > ---
> >  hw/remote/vfio-user-obj.c | 62 ++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 55 insertions(+), 7 deletions(-)
> >
> > diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
> > index 8b10c32a3c..9799580c77 100644
> > --- a/hw/remote/vfio-user-obj.c
> > +++ b/hw/remote/vfio-user-obj.c
> > @@ -300,6 +300,53 @@ static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
> >      return count;
> >  }
> >
> > +static MemTxResult vfu_dma_read(void *opaque, hwaddr addr, uint64_t *val,
> > +                                unsigned size, MemTxAttrs attrs)
> > +{
> > +    MemoryRegion *region = opaque;
> > +    VfuObject *o = VFU_OBJECT(region->owner);
> > +
> > +    dma_sg_t *sg = alloca(dma_sg_size());
> > +    vfu_dma_addr_t vfu_addr = (vfu_dma_addr_t)(region->addr + addr);
> > +    if (vfu_addr_to_sgl(o->vfu_ctx, vfu_addr, size, sg, 1, PROT_READ) < 0 ||
> > +        vfu_sgl_read(o->vfu_ctx, sg, 1, val) != 0) {
>
> Does this work on big-endian host CPUs? It looks like reading 0x12345678
> into uint64_t val would result in *val = 0x12345678xxxxxxxx instead of
> 0x12345678.

Ah, good catch, thanks! Confirmed as an issue using a cross-compiled
s390x qemu binary. I will fix this by using ld/st helpers.




>
> > +        return MEMTX_ERROR;
> > +    }
> > +
> > +    return MEMTX_OK;
> > +}
> > +
> > +static MemTxResult vfu_dma_write(void *opaque, hwaddr addr, uint64_t val,
> > +                                 unsigned size, MemTxAttrs attrs)
> > +{
> > +    MemoryRegion *region = opaque;
> > +    VfuObject *o = VFU_OBJECT(region->owner);
> > +
> > +    dma_sg_t *sg = alloca(dma_sg_size());
> > +    vfu_dma_addr_t vfu_addr = (vfu_dma_addr_t)(region->addr + addr);
> > +    if (vfu_addr_to_sgl(o->vfu_ctx, vfu_addr, size, sg, 1, PROT_WRITE) < 0 ||
> > +        vfu_sgl_write(o->vfu_ctx, sg, 1, &val) != 0)  {
>
> Same potential endianness issue here.
>
> Stefan
diff mbox series

Patch

diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index 8b10c32a3c..9799580c77 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -300,6 +300,53 @@  static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
     return count;
 }
 
+static MemTxResult vfu_dma_read(void *opaque, hwaddr addr, uint64_t *val,
+                                unsigned size, MemTxAttrs attrs)
+{
+    MemoryRegion *region = opaque;
+    VfuObject *o = VFU_OBJECT(region->owner);
+
+    dma_sg_t *sg = alloca(dma_sg_size());
+    vfu_dma_addr_t vfu_addr = (vfu_dma_addr_t)(region->addr + addr);
+    if (vfu_addr_to_sgl(o->vfu_ctx, vfu_addr, size, sg, 1, PROT_READ) < 0 ||
+        vfu_sgl_read(o->vfu_ctx, sg, 1, val) != 0) {
+        return MEMTX_ERROR;
+    }
+
+    return MEMTX_OK;
+}
+
+static MemTxResult vfu_dma_write(void *opaque, hwaddr addr, uint64_t val,
+                                 unsigned size, MemTxAttrs attrs)
+{
+    MemoryRegion *region = opaque;
+    VfuObject *o = VFU_OBJECT(region->owner);
+
+    dma_sg_t *sg = alloca(dma_sg_size());
+    vfu_dma_addr_t vfu_addr = (vfu_dma_addr_t)(region->addr + addr);
+    if (vfu_addr_to_sgl(o->vfu_ctx, vfu_addr, size, sg, 1, PROT_WRITE) < 0 ||
+        vfu_sgl_write(o->vfu_ctx, sg, 1, &val) != 0)  {
+        return MEMTX_ERROR;
+    }
+
+    return MEMTX_OK;
+}
+
+static const MemoryRegionOps vfu_dma_ops = {
+    .read_with_attrs = vfu_dma_read,
+    .write_with_attrs = vfu_dma_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+        .unaligned = true,
+    },
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+};
+
 static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
 {
     VfuObject *o = vfu_get_private(vfu_ctx);
@@ -308,17 +355,18 @@  static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
     g_autofree char *name = NULL;
     struct iovec *iov = &info->iova;
 
-    if (!info->vaddr) {
-        return;
-    }
-
     name = g_strdup_printf("mem-%s-%"PRIx64"", o->device,
-                           (uint64_t)info->vaddr);
+                           (uint64_t)iov->iov_base);
 
     subregion = g_new0(MemoryRegion, 1);
 
-    memory_region_init_ram_ptr(subregion, NULL, name,
-                               iov->iov_len, info->vaddr);
+    if (info->vaddr) {
+        memory_region_init_ram_ptr(subregion, OBJECT(o), name,
+                                   iov->iov_len, info->vaddr);
+    } else {
+        memory_region_init_io(subregion, OBJECT(o), &vfu_dma_ops, subregion,
+                              name, iov->iov_len);
+    }
 
     dma_as = pci_device_iommu_address_space(o->pci_dev);