Message ID | 711d01b9e52be0260f26a72feb286f6b32cbe332.1645079934.git.jag.raman@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user server in QEMU | expand |
On Thu, Feb 17, 2022 at 02:49:01AM -0500, Jagannathan Raman wrote: > Determine the BARs used by the PCI device and register handlers to > manage the access to the same. Hi Paolo, Please review this from the memory API perspective. vfu_object_bar_rw() reimplements MemoryRegion read/write because we're dispatching to a MemoryRegion without going through an AddressSpace/FlatView. > > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > --- > include/exec/memory.h | 3 + > hw/remote/vfio-user-obj.c | 166 ++++++++++++++++++++++++++++++++ > softmmu/physmem.c | 4 +- > tests/qtest/fuzz/generic_fuzz.c | 9 +- > hw/remote/trace-events | 3 + > 5 files changed, 179 insertions(+), 6 deletions(-) > > diff --git a/include/exec/memory.h b/include/exec/memory.h > index 4d5997e6bb..4b061e62d5 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -2810,6 +2810,9 @@ MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache, > hwaddr addr, const void *buf, > hwaddr len); > > +int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr); > +bool prepare_mmio_access(MemoryRegion *mr); > + > static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write) > { > if (is_write) { > diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c > index 971f6ca28e..2feabd06a4 100644 > --- a/hw/remote/vfio-user-obj.c > +++ b/hw/remote/vfio-user-obj.c > @@ -53,6 +53,7 @@ > #include "hw/qdev-core.h" > #include "hw/pci/pci.h" > #include "qemu/timer.h" > +#include "exec/memory.h" > > #define TYPE_VFU_OBJECT "x-vfio-user-server" > OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) > @@ -299,6 +300,169 @@ static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) > trace_vfu_dma_unregister((uint64_t)info->iova.iov_base); > } > > +static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar, > + hwaddr offset, char * const buf, > + hwaddr len, const bool is_write) > +{ > + uint8_t *ptr = (uint8_t *)buf; > + uint8_t *ram_ptr = NULL; > + bool release_lock = false; > + MemoryRegionSection section = { 0 }; > + MemoryRegion *mr = NULL; > + int access_size; > + hwaddr size = 0; > + MemTxResult result; > + uint64_t val; > + > + section = memory_region_find(pci_dev->io_regions[pci_bar].memory, > + offset, len); > + > + if (!section.mr) { > + return 0; > + } > + > + mr = section.mr; > + > + if (is_write && mr->readonly) { > + warn_report("vfu: attempting to write to readonly region in " > + "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]", > + pci_bar, offset, (offset + len)); > + return 0; > + } > + > + if (memory_access_is_direct(mr, is_write)) { > + /** > + * Some devices expose a PCI expansion ROM, which could be buffer > + * based as compared to other regions which are primarily based on > + * MemoryRegionOps. memory_region_find() would already check > + * for buffer overflow, we don't need to repeat it here. > + */ > + ram_ptr = memory_region_get_ram_ptr(mr); > + > + size = len; > + > + if (is_write) { > + memcpy(ram_ptr, buf, size); > + } else { > + memcpy(buf, ram_ptr, size); > + } > + > + goto exit; > + } > + > + while (len > 0) { > + /** > + * The read/write logic used below is similar to the ones in > + * flatview_read/write_continue() > + */ > + release_lock = prepare_mmio_access(mr); > + > + access_size = memory_access_size(mr, len, offset); > + > + if (is_write) { > + val = ldn_he_p(ptr, access_size); > + > + result = memory_region_dispatch_write(mr, offset, val, > + size_memop(access_size), > + MEMTXATTRS_UNSPECIFIED); > + } else { > + result = memory_region_dispatch_read(mr, offset, &val, > + size_memop(access_size), > + MEMTXATTRS_UNSPECIFIED); > + > + stn_he_p(ptr, access_size, val); > + } > + > + if (release_lock) { > + qemu_mutex_unlock_iothread(); > + release_lock = false; > + } > + > + if (result != MEMTX_OK) { > + warn_report("vfu: failed to %s 0x%"PRIx64"", > + is_write ? "write to" : "read from", > + (offset - size)); > + > + goto exit; > + } > + > + len -= access_size; > + size += access_size; > + ptr += access_size; > + offset += access_size; > + } > + > +exit: > + memory_region_unref(mr); > + > + return size; > +} > + > +/** > + * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs. > + * > + * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would > + * define vfu_object_bar2_handler > + */ > +#define VFU_OBJECT_BAR_HANDLER(BAR_NO) \ > + static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \ > + char * const buf, size_t count, \ > + loff_t offset, const bool is_write) \ > + { \ > + VfuObject *o = vfu_get_private(vfu_ctx); \ > + PCIDevice *pci_dev = o->pci_dev; \ > + \ > + return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \ > + buf, count, is_write); \ > + } \ > + > +VFU_OBJECT_BAR_HANDLER(0) > +VFU_OBJECT_BAR_HANDLER(1) > +VFU_OBJECT_BAR_HANDLER(2) > +VFU_OBJECT_BAR_HANDLER(3) > +VFU_OBJECT_BAR_HANDLER(4) > +VFU_OBJECT_BAR_HANDLER(5) > +VFU_OBJECT_BAR_HANDLER(6) > + > +static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = { > + &vfu_object_bar0_handler, > + &vfu_object_bar1_handler, > + &vfu_object_bar2_handler, > + &vfu_object_bar3_handler, > + &vfu_object_bar4_handler, > + &vfu_object_bar5_handler, > + &vfu_object_bar6_handler, > +}; > + > +/** > + * vfu_object_register_bars - Identify active BAR regions of pdev and setup > + * callbacks to handle read/write accesses > + */ > +static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev) > +{ > + int flags = VFU_REGION_FLAG_RW; > + int i; > + > + for (i = 0; i < PCI_NUM_REGIONS; i++) { > + if (!pdev->io_regions[i].size) { > + continue; > + } > + > + if ((i == VFU_PCI_DEV_ROM_REGION_IDX) || > + pdev->io_regions[i].memory->readonly) { > + flags &= ~VFU_REGION_FLAG_WRITE; > + } > + > + vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i, > + (size_t)pdev->io_regions[i].size, > + vfu_object_bar_handlers[i], > + flags, NULL, 0, -1, 0); > + > + trace_vfu_bar_register(i, pdev->io_regions[i].addr, > + pdev->io_regions[i].size); > + } > +} > + > /* > * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' > * properties. It also depends on devices instantiated in QEMU. These > @@ -393,6 +557,8 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) > goto fail; > } > > + vfu_object_register_bars(o->vfu_ctx, o->pci_dev); > + > ret = vfu_realize_ctx(o->vfu_ctx); > if (ret < 0) { > error_setg(errp, "vfu: Failed to realize device %s- %s", > diff --git a/softmmu/physmem.c b/softmmu/physmem.c > index dddf70edf5..3188d4e143 100644 > --- a/softmmu/physmem.c > +++ b/softmmu/physmem.c > @@ -2717,7 +2717,7 @@ void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size) > invalidate_and_set_dirty(mr, addr, size); > } > > -static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) > +int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) > { > unsigned access_size_max = mr->ops->valid.max_access_size; > > @@ -2744,7 +2744,7 @@ static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) > return l; > } > > -static bool prepare_mmio_access(MemoryRegion *mr) > +bool prepare_mmio_access(MemoryRegion *mr) > { > bool release_lock = false; > > diff --git a/tests/qtest/fuzz/generic_fuzz.c b/tests/qtest/fuzz/generic_fuzz.c > index dd7e25851c..77547fc1d8 100644 > --- a/tests/qtest/fuzz/generic_fuzz.c > +++ b/tests/qtest/fuzz/generic_fuzz.c > @@ -144,7 +144,7 @@ static void *pattern_alloc(pattern p, size_t len) > return buf; > } > > -static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) > +static int fuzz_memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) > { > unsigned access_size_max = mr->ops->valid.max_access_size; > > @@ -242,11 +242,12 @@ void fuzz_dma_read_cb(size_t addr, size_t len, MemoryRegion *mr) > > /* > * If mr1 isn't RAM, address_space_translate doesn't update l. Use > - * memory_access_size to identify the number of bytes that it is safe > - * to write without accidentally writing to another MemoryRegion. > + * fuzz_memory_access_size to identify the number of bytes that it > + * is safe to write without accidentally writing to another > + * MemoryRegion. > */ > if (!memory_region_is_ram(mr1)) { > - l = memory_access_size(mr1, l, addr1); > + l = fuzz_memory_access_size(mr1, l, addr1); > } > if (memory_region_is_ram(mr1) || > memory_region_is_romd(mr1) || > diff --git a/hw/remote/trace-events b/hw/remote/trace-events > index f945c7e33b..847d50d88f 100644 > --- a/hw/remote/trace-events > +++ b/hw/remote/trace-events > @@ -9,3 +9,6 @@ vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u -> 0x%x" > vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u <- 0x%x" > vfu_dma_register(uint64_t gpa, size_t len) "vfu: registering GPA 0x%"PRIx64", %zu bytes" > vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64"" > +vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64"" > +vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64"" > +vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64"" > -- > 2.20.1 >
diff --git a/include/exec/memory.h b/include/exec/memory.h index 4d5997e6bb..4b061e62d5 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -2810,6 +2810,9 @@ MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr, const void *buf, hwaddr len); +int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr); +bool prepare_mmio_access(MemoryRegion *mr); + static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write) { if (is_write) { diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c index 971f6ca28e..2feabd06a4 100644 --- a/hw/remote/vfio-user-obj.c +++ b/hw/remote/vfio-user-obj.c @@ -53,6 +53,7 @@ #include "hw/qdev-core.h" #include "hw/pci/pci.h" #include "qemu/timer.h" +#include "exec/memory.h" #define TYPE_VFU_OBJECT "x-vfio-user-server" OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) @@ -299,6 +300,169 @@ static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) trace_vfu_dma_unregister((uint64_t)info->iova.iov_base); } +static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar, + hwaddr offset, char * const buf, + hwaddr len, const bool is_write) +{ + uint8_t *ptr = (uint8_t *)buf; + uint8_t *ram_ptr = NULL; + bool release_lock = false; + MemoryRegionSection section = { 0 }; + MemoryRegion *mr = NULL; + int access_size; + hwaddr size = 0; + MemTxResult result; + uint64_t val; + + section = memory_region_find(pci_dev->io_regions[pci_bar].memory, + offset, len); + + if (!section.mr) { + return 0; + } + + mr = section.mr; + + if (is_write && mr->readonly) { + warn_report("vfu: attempting to write to readonly region in " + "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]", + pci_bar, offset, (offset + len)); + return 0; + } + + if (memory_access_is_direct(mr, is_write)) { + /** + * Some devices expose a PCI expansion ROM, which could be buffer + * based as compared to other regions which are primarily based on + * MemoryRegionOps. memory_region_find() would already check + * for buffer overflow, we don't need to repeat it here. + */ + ram_ptr = memory_region_get_ram_ptr(mr); + + size = len; + + if (is_write) { + memcpy(ram_ptr, buf, size); + } else { + memcpy(buf, ram_ptr, size); + } + + goto exit; + } + + while (len > 0) { + /** + * The read/write logic used below is similar to the ones in + * flatview_read/write_continue() + */ + release_lock = prepare_mmio_access(mr); + + access_size = memory_access_size(mr, len, offset); + + if (is_write) { + val = ldn_he_p(ptr, access_size); + + result = memory_region_dispatch_write(mr, offset, val, + size_memop(access_size), + MEMTXATTRS_UNSPECIFIED); + } else { + result = memory_region_dispatch_read(mr, offset, &val, + size_memop(access_size), + MEMTXATTRS_UNSPECIFIED); + + stn_he_p(ptr, access_size, val); + } + + if (release_lock) { + qemu_mutex_unlock_iothread(); + release_lock = false; + } + + if (result != MEMTX_OK) { + warn_report("vfu: failed to %s 0x%"PRIx64"", + is_write ? "write to" : "read from", + (offset - size)); + + goto exit; + } + + len -= access_size; + size += access_size; + ptr += access_size; + offset += access_size; + } + +exit: + memory_region_unref(mr); + + return size; +} + +/** + * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs. + * + * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would + * define vfu_object_bar2_handler + */ +#define VFU_OBJECT_BAR_HANDLER(BAR_NO) \ + static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \ + char * const buf, size_t count, \ + loff_t offset, const bool is_write) \ + { \ + VfuObject *o = vfu_get_private(vfu_ctx); \ + PCIDevice *pci_dev = o->pci_dev; \ + \ + return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \ + buf, count, is_write); \ + } \ + +VFU_OBJECT_BAR_HANDLER(0) +VFU_OBJECT_BAR_HANDLER(1) +VFU_OBJECT_BAR_HANDLER(2) +VFU_OBJECT_BAR_HANDLER(3) +VFU_OBJECT_BAR_HANDLER(4) +VFU_OBJECT_BAR_HANDLER(5) +VFU_OBJECT_BAR_HANDLER(6) + +static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = { + &vfu_object_bar0_handler, + &vfu_object_bar1_handler, + &vfu_object_bar2_handler, + &vfu_object_bar3_handler, + &vfu_object_bar4_handler, + &vfu_object_bar5_handler, + &vfu_object_bar6_handler, +}; + +/** + * vfu_object_register_bars - Identify active BAR regions of pdev and setup + * callbacks to handle read/write accesses + */ +static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev) +{ + int flags = VFU_REGION_FLAG_RW; + int i; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + if (!pdev->io_regions[i].size) { + continue; + } + + if ((i == VFU_PCI_DEV_ROM_REGION_IDX) || + pdev->io_regions[i].memory->readonly) { + flags &= ~VFU_REGION_FLAG_WRITE; + } + + vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i, + (size_t)pdev->io_regions[i].size, + vfu_object_bar_handlers[i], + flags, NULL, 0, -1, 0); + + trace_vfu_bar_register(i, pdev->io_regions[i].addr, + pdev->io_regions[i].size); + } +} + /* * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' * properties. It also depends on devices instantiated in QEMU. These @@ -393,6 +557,8 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) goto fail; } + vfu_object_register_bars(o->vfu_ctx, o->pci_dev); + ret = vfu_realize_ctx(o->vfu_ctx); if (ret < 0) { error_setg(errp, "vfu: Failed to realize device %s- %s", diff --git a/softmmu/physmem.c b/softmmu/physmem.c index dddf70edf5..3188d4e143 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -2717,7 +2717,7 @@ void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size) invalidate_and_set_dirty(mr, addr, size); } -static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) +int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) { unsigned access_size_max = mr->ops->valid.max_access_size; @@ -2744,7 +2744,7 @@ static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) return l; } -static bool prepare_mmio_access(MemoryRegion *mr) +bool prepare_mmio_access(MemoryRegion *mr) { bool release_lock = false; diff --git a/tests/qtest/fuzz/generic_fuzz.c b/tests/qtest/fuzz/generic_fuzz.c index dd7e25851c..77547fc1d8 100644 --- a/tests/qtest/fuzz/generic_fuzz.c +++ b/tests/qtest/fuzz/generic_fuzz.c @@ -144,7 +144,7 @@ static void *pattern_alloc(pattern p, size_t len) return buf; } -static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) +static int fuzz_memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) { unsigned access_size_max = mr->ops->valid.max_access_size; @@ -242,11 +242,12 @@ void fuzz_dma_read_cb(size_t addr, size_t len, MemoryRegion *mr) /* * If mr1 isn't RAM, address_space_translate doesn't update l. Use - * memory_access_size to identify the number of bytes that it is safe - * to write without accidentally writing to another MemoryRegion. + * fuzz_memory_access_size to identify the number of bytes that it + * is safe to write without accidentally writing to another + * MemoryRegion. */ if (!memory_region_is_ram(mr1)) { - l = memory_access_size(mr1, l, addr1); + l = fuzz_memory_access_size(mr1, l, addr1); } if (memory_region_is_ram(mr1) || memory_region_is_romd(mr1) || diff --git a/hw/remote/trace-events b/hw/remote/trace-events index f945c7e33b..847d50d88f 100644 --- a/hw/remote/trace-events +++ b/hw/remote/trace-events @@ -9,3 +9,6 @@ vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u -> 0x%x" vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u <- 0x%x" vfu_dma_register(uint64_t gpa, size_t len) "vfu: registering GPA 0x%"PRIx64", %zu bytes" vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64"" +vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64"" +vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64"" +vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64""