Message ID | 20230704080628.852525-2-mnissler@rivosinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support message-based DMA in vfio-user server | expand |
On Tue, Jul 04, 2023 at 01:06:25AM -0700, Mattias Nissler wrote: > It is not uncommon for device models to request mapping of several DMA > regions at the same time. An example is igb (and probably other net > devices as well) when a packet is spread across multiple descriptors. > > In order to support this when indirect DMA is used, as is the case when > running the device model in a vfio-server process without mmap()-ed DMA, > this change allocates DMA bounce buffers dynamically instead of > supporting only a single buffer. > > Signed-off-by: Mattias Nissler <mnissler@rivosinc.com> > --- > softmmu/physmem.c | 74 ++++++++++++++++++++++------------------------- > 1 file changed, 35 insertions(+), 39 deletions(-) Is this a functional change or purely a performance optimization? If it's a performance optimization, please include benchmark results to justify this change. QEMU memory allocations must be bounded so that an untrusted guest cannot cause QEMU to exhaust host memory. There must be a limit to the amount of bounce buffer memory. > diff --git a/softmmu/physmem.c b/softmmu/physmem.c > index bda475a719..56130b5a1d 100644 > --- a/softmmu/physmem.c > +++ b/softmmu/physmem.c > @@ -2904,13 +2904,11 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len) > > typedef struct { > MemoryRegion *mr; > - void *buffer; > hwaddr addr; > - hwaddr len; > - bool in_use; > + uint8_t buffer[]; > } BounceBuffer; > > -static BounceBuffer bounce; > +static size_t bounce_buffers_in_use; > > typedef struct MapClient { > QEMUBH *bh; > @@ -2947,7 +2945,7 @@ void cpu_register_map_client(QEMUBH *bh) > QLIST_INSERT_HEAD(&map_client_list, client, link); > /* Write map_client_list before reading in_use. */ > smp_mb(); > - if (!qatomic_read(&bounce.in_use)) { > + if (qatomic_read(&bounce_buffers_in_use)) { > cpu_notify_map_clients_locked(); > } > qemu_mutex_unlock(&map_client_list_lock); > @@ -3076,31 +3074,24 @@ void *address_space_map(AddressSpace *as, > RCU_READ_LOCK_GUARD(); > fv = address_space_to_flatview(as); > mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); > + memory_region_ref(mr); > > if (!memory_access_is_direct(mr, is_write)) { > - if (qatomic_xchg(&bounce.in_use, true)) { > - *plen = 0; > - return NULL; > - } > - /* Avoid unbounded allocations */ > - l = MIN(l, TARGET_PAGE_SIZE); > - bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); > - bounce.addr = addr; > - bounce.len = l; > - > - memory_region_ref(mr); > - bounce.mr = mr; > + qatomic_inc_fetch(&bounce_buffers_in_use); > + > + BounceBuffer *bounce = g_malloc(l + sizeof(BounceBuffer)); > + bounce->addr = addr; > + bounce->mr = mr; > + > if (!is_write) { > flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, > - bounce.buffer, l); > + bounce->buffer, l); > } > > *plen = l; > - return bounce.buffer; > + return bounce->buffer; Bounce buffer allocation always succeeds now. Can the cpu_notify_map_clients*() be removed now that no one is waiting for bounce buffers anymore? > } > > - > - memory_region_ref(mr); > *plen = flatview_extend_translation(fv, addr, len, mr, xlat, > l, is_write, attrs); > fuzz_dma_read_cb(addr, *plen, mr); > @@ -3114,31 +3105,36 @@ void *address_space_map(AddressSpace *as, > void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, > bool is_write, hwaddr access_len) > { > - if (buffer != bounce.buffer) { > - MemoryRegion *mr; > - ram_addr_t addr1; > + MemoryRegion *mr; > + ram_addr_t addr1; > + > + mr = memory_region_from_host(buffer, &addr1); > + if (mr == NULL) { > + /* > + * Must be a bounce buffer (unless the caller passed a pointer which > + * wasn't returned by address_space_map, which is illegal). > + */ > + BounceBuffer *bounce = container_of(buffer, BounceBuffer, buffer); > > - mr = memory_region_from_host(buffer, &addr1); > - assert(mr != NULL); > if (is_write) { > - invalidate_and_set_dirty(mr, addr1, access_len); > + address_space_write(as, bounce->addr, MEMTXATTRS_UNSPECIFIED, > + bounce->buffer, access_len); > } > - if (xen_enabled()) { > - xen_invalidate_map_cache_entry(buffer); > + memory_region_unref(bounce->mr); > + g_free(bounce); > + > + if (qatomic_dec_fetch(&bounce_buffers_in_use) == 1) { > + cpu_notify_map_clients(); > } > - memory_region_unref(mr); > return; > } > + > + if (xen_enabled()) { > + xen_invalidate_map_cache_entry(buffer); > + } > if (is_write) { > - address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED, > - bounce.buffer, access_len); > - } > - qemu_vfree(bounce.buffer); > - bounce.buffer = NULL; > - memory_region_unref(bounce.mr); > - /* Clear in_use before reading map_client_list. */ > - qatomic_set_mb(&bounce.in_use, false); > - cpu_notify_map_clients(); > + invalidate_and_set_dirty(mr, addr1, access_len); > + } > } > > void *cpu_physical_memory_map(hwaddr addr, > -- > 2.34.1 >
On Tue, Jul 04, 2023 at 01:06:25AM -0700, Mattias Nissler wrote: > + if (qatomic_dec_fetch(&bounce_buffers_in_use) == 1) { > + cpu_notify_map_clients(); > } About my comment regarding removing this API: I see the next patch does that. Stefan
On Thu, Jul 20, 2023 at 8:10 PM Stefan Hajnoczi <stefanha@redhat.com> wrote: > > On Tue, Jul 04, 2023 at 01:06:25AM -0700, Mattias Nissler wrote: > > It is not uncommon for device models to request mapping of several DMA > > regions at the same time. An example is igb (and probably other net > > devices as well) when a packet is spread across multiple descriptors. > > > > In order to support this when indirect DMA is used, as is the case when > > running the device model in a vfio-server process without mmap()-ed DMA, > > this change allocates DMA bounce buffers dynamically instead of > > supporting only a single buffer. > > > > Signed-off-by: Mattias Nissler <mnissler@rivosinc.com> > > --- > > softmmu/physmem.c | 74 ++++++++++++++++++++++------------------------- > > 1 file changed, 35 insertions(+), 39 deletions(-) > > Is this a functional change or purely a performance optimization? If > it's a performance optimization, please include benchmark results to > justify this change. It's a functional change in the sense that it fixes qemu to make some hardware models actually work with bounce-buffered DMA. Right now, the device models attempt to perform DMA accesses, receive an error due to bounce buffer contention and then just bail, which the guest will observe as a timeout and/or hardware error. I ran into this with igb and xhci. > > > QEMU memory allocations must be bounded so that an untrusted guest > cannot cause QEMU to exhaust host memory. There must be a limit to the > amount of bounce buffer memory. Ah, makes sense. I will add code to track the total bounce buffer size and enforce a limit. Since the amount of buffer space depends a lot on the workload (I have observed xhci + usb-storage + Linux to use 1MB buffer sizes by default), I'll make the limit configurable. > > > > diff --git a/softmmu/physmem.c b/softmmu/physmem.c > > index bda475a719..56130b5a1d 100644 > > --- a/softmmu/physmem.c > > +++ b/softmmu/physmem.c > > @@ -2904,13 +2904,11 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len) > > > > typedef struct { > > MemoryRegion *mr; > > - void *buffer; > > hwaddr addr; > > - hwaddr len; > > - bool in_use; > > + uint8_t buffer[]; > > } BounceBuffer; > > > > -static BounceBuffer bounce; > > +static size_t bounce_buffers_in_use; > > > > typedef struct MapClient { > > QEMUBH *bh; > > @@ -2947,7 +2945,7 @@ void cpu_register_map_client(QEMUBH *bh) > > QLIST_INSERT_HEAD(&map_client_list, client, link); > > /* Write map_client_list before reading in_use. */ > > smp_mb(); > > - if (!qatomic_read(&bounce.in_use)) { > > + if (qatomic_read(&bounce_buffers_in_use)) { > > cpu_notify_map_clients_locked(); > > } > > qemu_mutex_unlock(&map_client_list_lock); > > @@ -3076,31 +3074,24 @@ void *address_space_map(AddressSpace *as, > > RCU_READ_LOCK_GUARD(); > > fv = address_space_to_flatview(as); > > mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); > > + memory_region_ref(mr); > > > > if (!memory_access_is_direct(mr, is_write)) { > > - if (qatomic_xchg(&bounce.in_use, true)) { > > - *plen = 0; > > - return NULL; > > - } > > - /* Avoid unbounded allocations */ > > - l = MIN(l, TARGET_PAGE_SIZE); > > - bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); > > - bounce.addr = addr; > > - bounce.len = l; > > - > > - memory_region_ref(mr); > > - bounce.mr = mr; > > + qatomic_inc_fetch(&bounce_buffers_in_use); > > + > > + BounceBuffer *bounce = g_malloc(l + sizeof(BounceBuffer)); > > + bounce->addr = addr; > > + bounce->mr = mr; > > + > > if (!is_write) { > > flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, > > - bounce.buffer, l); > > + bounce->buffer, l); > > } > > > > *plen = l; > > - return bounce.buffer; > > + return bounce->buffer; > > Bounce buffer allocation always succeeds now. Can the > cpu_notify_map_clients*() be removed now that no one is waiting for > bounce buffers anymore? > > > } > > > > - > > - memory_region_ref(mr); > > *plen = flatview_extend_translation(fv, addr, len, mr, xlat, > > l, is_write, attrs); > > fuzz_dma_read_cb(addr, *plen, mr); > > @@ -3114,31 +3105,36 @@ void *address_space_map(AddressSpace *as, > > void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, > > bool is_write, hwaddr access_len) > > { > > - if (buffer != bounce.buffer) { > > - MemoryRegion *mr; > > - ram_addr_t addr1; > > + MemoryRegion *mr; > > + ram_addr_t addr1; > > + > > + mr = memory_region_from_host(buffer, &addr1); > > + if (mr == NULL) { > > + /* > > + * Must be a bounce buffer (unless the caller passed a pointer which > > + * wasn't returned by address_space_map, which is illegal). > > + */ > > + BounceBuffer *bounce = container_of(buffer, BounceBuffer, buffer); > > > > - mr = memory_region_from_host(buffer, &addr1); > > - assert(mr != NULL); > > if (is_write) { > > - invalidate_and_set_dirty(mr, addr1, access_len); > > + address_space_write(as, bounce->addr, MEMTXATTRS_UNSPECIFIED, > > + bounce->buffer, access_len); > > } > > - if (xen_enabled()) { > > - xen_invalidate_map_cache_entry(buffer); > > + memory_region_unref(bounce->mr); > > + g_free(bounce); > > + > > + if (qatomic_dec_fetch(&bounce_buffers_in_use) == 1) { > > + cpu_notify_map_clients(); > > } > > - memory_region_unref(mr); > > return; > > } > > + > > + if (xen_enabled()) { > > + xen_invalidate_map_cache_entry(buffer); > > + } > > if (is_write) { > > - address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED, > > - bounce.buffer, access_len); > > - } > > - qemu_vfree(bounce.buffer); > > - bounce.buffer = NULL; > > - memory_region_unref(bounce.mr); > > - /* Clear in_use before reading map_client_list. */ > > - qatomic_set_mb(&bounce.in_use, false); > > - cpu_notify_map_clients(); > > + invalidate_and_set_dirty(mr, addr1, access_len); > > + } > > } > > > > void *cpu_physical_memory_map(hwaddr addr, > > -- > > 2.34.1 > >
diff --git a/softmmu/physmem.c b/softmmu/physmem.c index bda475a719..56130b5a1d 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -2904,13 +2904,11 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len) typedef struct { MemoryRegion *mr; - void *buffer; hwaddr addr; - hwaddr len; - bool in_use; + uint8_t buffer[]; } BounceBuffer; -static BounceBuffer bounce; +static size_t bounce_buffers_in_use; typedef struct MapClient { QEMUBH *bh; @@ -2947,7 +2945,7 @@ void cpu_register_map_client(QEMUBH *bh) QLIST_INSERT_HEAD(&map_client_list, client, link); /* Write map_client_list before reading in_use. */ smp_mb(); - if (!qatomic_read(&bounce.in_use)) { + if (qatomic_read(&bounce_buffers_in_use)) { cpu_notify_map_clients_locked(); } qemu_mutex_unlock(&map_client_list_lock); @@ -3076,31 +3074,24 @@ void *address_space_map(AddressSpace *as, RCU_READ_LOCK_GUARD(); fv = address_space_to_flatview(as); mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs); + memory_region_ref(mr); if (!memory_access_is_direct(mr, is_write)) { - if (qatomic_xchg(&bounce.in_use, true)) { - *plen = 0; - return NULL; - } - /* Avoid unbounded allocations */ - l = MIN(l, TARGET_PAGE_SIZE); - bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); - bounce.addr = addr; - bounce.len = l; - - memory_region_ref(mr); - bounce.mr = mr; + qatomic_inc_fetch(&bounce_buffers_in_use); + + BounceBuffer *bounce = g_malloc(l + sizeof(BounceBuffer)); + bounce->addr = addr; + bounce->mr = mr; + if (!is_write) { flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED, - bounce.buffer, l); + bounce->buffer, l); } *plen = l; - return bounce.buffer; + return bounce->buffer; } - - memory_region_ref(mr); *plen = flatview_extend_translation(fv, addr, len, mr, xlat, l, is_write, attrs); fuzz_dma_read_cb(addr, *plen, mr); @@ -3114,31 +3105,36 @@ void *address_space_map(AddressSpace *as, void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, bool is_write, hwaddr access_len) { - if (buffer != bounce.buffer) { - MemoryRegion *mr; - ram_addr_t addr1; + MemoryRegion *mr; + ram_addr_t addr1; + + mr = memory_region_from_host(buffer, &addr1); + if (mr == NULL) { + /* + * Must be a bounce buffer (unless the caller passed a pointer which + * wasn't returned by address_space_map, which is illegal). + */ + BounceBuffer *bounce = container_of(buffer, BounceBuffer, buffer); - mr = memory_region_from_host(buffer, &addr1); - assert(mr != NULL); if (is_write) { - invalidate_and_set_dirty(mr, addr1, access_len); + address_space_write(as, bounce->addr, MEMTXATTRS_UNSPECIFIED, + bounce->buffer, access_len); } - if (xen_enabled()) { - xen_invalidate_map_cache_entry(buffer); + memory_region_unref(bounce->mr); + g_free(bounce); + + if (qatomic_dec_fetch(&bounce_buffers_in_use) == 1) { + cpu_notify_map_clients(); } - memory_region_unref(mr); return; } + + if (xen_enabled()) { + xen_invalidate_map_cache_entry(buffer); + } if (is_write) { - address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED, - bounce.buffer, access_len); - } - qemu_vfree(bounce.buffer); - bounce.buffer = NULL; - memory_region_unref(bounce.mr); - /* Clear in_use before reading map_client_list. */ - qatomic_set_mb(&bounce.in_use, false); - cpu_notify_map_clients(); + invalidate_and_set_dirty(mr, addr1, access_len); + } } void *cpu_physical_memory_map(hwaddr addr,
It is not uncommon for device models to request mapping of several DMA regions at the same time. An example is igb (and probably other net devices as well) when a packet is spread across multiple descriptors. In order to support this when indirect DMA is used, as is the case when running the device model in a vfio-server process without mmap()-ed DMA, this change allocates DMA bounce buffers dynamically instead of supporting only a single buffer. Signed-off-by: Mattias Nissler <mnissler@rivosinc.com> --- softmmu/physmem.c | 74 ++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 39 deletions(-)