diff mbox series

[RFC,v3,4/5] virtio: add in_xlat_addr & out_xlat_addr VirtQueueElement members

Message ID 20250110170837.2747532-5-jonah.palmer@oracle.com (mailing list archive)
State New
Headers show
Series Handling aliased guest memory maps in vhost-vDPA SVQs | expand

Commit Message

Jonah Palmer Jan. 10, 2025, 5:08 p.m. UTC
Adds the in_xlat_addr & out_xlat_addr hwaddr arrays to the
VirtQueueElement struct and introduces an optional GPA output parameter
to dma_memory_map().

These arrays will store a VirtQueueElement's input/output descriptors'
GPA of the mapped memory region, if it's backed by guest memory, via
dma_memory_map().

The GPA will always correspond 1:1 to the iovec entry when translating
addresses between Qemu VAs and SVQ IOVAs in vhost_svq_translate_addr().
This helps to avoid extra complicated code in SVQ's
vhost_svq_vring_write_descs() function (e.g. splitting up iovec into
multiple buffers, not breaking devices using aliased mapping, etc.).

Since the translation is only done once inside the DMA API alongside
virtqueue_pop(), the cost should be minimal.

Signed-off-by: Jonah Palmer <jonah.palmer@oracle.com>
---
 hw/display/virtio-gpu.c     |  5 ++--
 hw/hyperv/vmbus.c           |  8 +++---
 hw/ide/ahci.c               |  7 +++---
 hw/usb/libhw.c              |  2 +-
 hw/virtio/virtio.c          | 50 ++++++++++++++++++++++++++-----------
 include/hw/pci/pci_device.h |  2 +-
 include/hw/virtio/virtio.h  |  2 ++
 include/system/dma.h        | 25 ++++++++++++++++++-
 system/dma-helpers.c        |  2 +-
 9 files changed, 77 insertions(+), 26 deletions(-)
diff mbox series

Patch

diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 11a7a85750..afb9a8b69f 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -839,7 +839,7 @@  int virtio_gpu_create_mapping_iov(VirtIOGPU *g,
             len = l;
             map = dma_memory_map(VIRTIO_DEVICE(g)->dma_as, a, &len,
                                  DMA_DIRECTION_TO_DEVICE,
-                                 MEMTXATTRS_UNSPECIFIED);
+                                 MEMTXATTRS_UNSPECIFIED, NULL);
             if (!map) {
                 qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory for"
                               " element %d\n", __func__, e);
@@ -1258,7 +1258,8 @@  static bool virtio_gpu_load_restore_mapping(VirtIOGPU *g,
         hwaddr len = res->iov[i].iov_len;
         res->iov[i].iov_base =
             dma_memory_map(VIRTIO_DEVICE(g)->dma_as, res->addrs[i], &len,
-                           DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
+                           DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED,
+                           NULL);
 
         if (!res->iov[i].iov_base || len != res->iov[i].iov_len) {
             /* Clean up the half-a-mapping we just created... */
diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c
index 12a7dc4312..c3308a1bfd 100644
--- a/hw/hyperv/vmbus.c
+++ b/hw/hyperv/vmbus.c
@@ -374,7 +374,7 @@  static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
             maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
 
             iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir,
-                                       MEMTXATTRS_UNSPECIFIED);
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
             if (mlen != pgleft) {
                 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
                 iter->map = NULL;
@@ -492,7 +492,8 @@  int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
             }
 
             iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir,
-                                                   MEMTXATTRS_UNSPECIFIED);
+                                                   MEMTXATTRS_UNSPECIFIED,
+                                                   NULL);
             if (!l) {
                 ret = -EFAULT;
                 goto err;
@@ -568,7 +569,8 @@  static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
     dma_addr_t mlen = sizeof(*rb);
 
     rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
-                        DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
+                        DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED,
+                        NULL);
     if (mlen != sizeof(*rb)) {
         dma_memory_unmap(ringbuf->as, rb, mlen,
                          DMA_DIRECTION_FROM_DEVICE, 0);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 1303c21cb7..aeea2dc61d 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -221,7 +221,7 @@  static void map_page(AddressSpace *as, uint8_t **ptr, uint64_t addr,
     }
 
     *ptr = dma_memory_map(as, addr, &len, DMA_DIRECTION_FROM_DEVICE,
-                          MEMTXATTRS_UNSPECIFIED);
+                          MEMTXATTRS_UNSPECIFIED, NULL);
     if (len < wanted && *ptr) {
         dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len);
         *ptr = NULL;
@@ -928,7 +928,7 @@  static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist,
     /* map PRDT */
     if (!(prdt = dma_memory_map(ad->hba->as, prdt_addr, &prdt_len,
                                 DMA_DIRECTION_TO_DEVICE,
-                                MEMTXATTRS_UNSPECIFIED))){
+                                MEMTXATTRS_UNSPECIFIED, NULL))) {
         trace_ahci_populate_sglist_no_map(ad->hba, ad->port_no);
         return -1;
     }
@@ -1338,7 +1338,8 @@  static void handle_cmd(AHCIState *s, int port, uint8_t slot)
     tbl_addr = le64_to_cpu(cmd->tbl_addr);
     cmd_len = 0x80;
     cmd_fis = dma_memory_map(s->as, tbl_addr, &cmd_len,
-                             DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
+                             DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED,
+                             NULL);
     if (!cmd_fis) {
         trace_handle_cmd_badfis(s, port);
         return;
diff --git a/hw/usb/libhw.c b/hw/usb/libhw.c
index 4f03ef4ba9..762d70b419 100644
--- a/hw/usb/libhw.c
+++ b/hw/usb/libhw.c
@@ -37,7 +37,7 @@  int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
         while (len) {
             dma_addr_t xlen = len;
             mem = dma_memory_map(sgl->as, base, &xlen, dir,
-                                 MEMTXATTRS_UNSPECIFIED);
+                                 MEMTXATTRS_UNSPECIFIED, NULL);
             if (!mem) {
                 goto err;
             }
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 85110bce37..be756f3ac8 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1553,9 +1553,9 @@  int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
 }
 
 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
-                               hwaddr *addr, struct iovec *iov,
-                               unsigned int max_num_sg, bool is_write,
-                               hwaddr pa, size_t sz)
+                               hwaddr *addr, hwaddr *xlat_addr,
+                               struct iovec *iov, unsigned int max_num_sg,
+                               bool is_write, hwaddr pa, size_t sz)
 {
     bool ok = false;
     unsigned num_sg = *p_num_sg;
@@ -1579,7 +1579,8 @@  static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
                                               is_write ?
                                               DMA_DIRECTION_FROM_DEVICE :
                                               DMA_DIRECTION_TO_DEVICE,
-                                              MEMTXATTRS_UNSPECIFIED);
+                                              MEMTXATTRS_UNSPECIFIED,
+                                              &xlat_addr[num_sg]);
         if (!iov[num_sg].iov_base) {
             virtio_error(vdev, "virtio: bogus descriptor or out of resources");
             goto out;
@@ -1618,7 +1619,7 @@  static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
 
 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
                                 hwaddr *addr, unsigned int num_sg,
-                                bool is_write)
+                                hwaddr *xlat_addr, bool is_write)
 {
     unsigned int i;
     hwaddr len;
@@ -1629,7 +1630,8 @@  static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
                                         addr[i], &len, is_write ?
                                         DMA_DIRECTION_FROM_DEVICE :
                                         DMA_DIRECTION_TO_DEVICE,
-                                        MEMTXATTRS_UNSPECIFIED);
+                                        MEMTXATTRS_UNSPECIFIED,
+                                        &xlat_addr[i]);
         if (!sg[i].iov_base) {
             error_report("virtio: error trying to map MMIO memory");
             exit(1);
@@ -1643,9 +1645,10 @@  static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
 
 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
 {
-    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
+    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num,
+                        elem->in_xlat_addr, true);
     virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
-                                                                        false);
+                        elem->out_xlat_addr, false);
 }
 
 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
@@ -1654,7 +1657,14 @@  static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu
     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
     size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
     size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
-    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
+    size_t in_xlat_addr_ofs =
+        QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_xlat_addr[0]));
+    size_t out_xlat_addr_ofs = in_xlat_addr_ofs + in_num *
+                               sizeof(elem->in_xlat_addr[0]);
+    size_t out_xlat_addr_end = out_xlat_addr_ofs + out_num *
+                               sizeof(elem->out_xlat_addr[0]);
+    size_t in_sg_ofs =
+        QEMU_ALIGN_UP(out_xlat_addr_end, __alignof__(elem->in_sg[0]));
     size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
     size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
 
@@ -1665,6 +1675,8 @@  static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu
     elem->in_num = in_num;
     elem->in_addr = (void *)elem + in_addr_ofs;
     elem->out_addr = (void *)elem + out_addr_ofs;
+    elem->in_xlat_addr = (void *)elem + in_xlat_addr_ofs;
+    elem->out_xlat_addr = (void *)elem + out_xlat_addr_ofs;
     elem->in_sg = (void *)elem + in_sg_ofs;
     elem->out_sg = (void *)elem + out_sg_ofs;
     return elem;
@@ -1681,6 +1693,7 @@  static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
     VirtQueueElement *elem = NULL;
     unsigned out_num, in_num, elem_entries;
     hwaddr addr[VIRTQUEUE_MAX_SIZE];
+    hwaddr xlat_addr[VIRTQUEUE_MAX_SIZE];
     struct iovec iov[VIRTQUEUE_MAX_SIZE];
     VRingDesc desc;
     int rc;
@@ -1754,7 +1767,7 @@  static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
 
         if (desc.flags & VRING_DESC_F_WRITE) {
             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
-                                        iov + out_num,
+                                        xlat_addr + out_num, iov + out_num,
                                         VIRTQUEUE_MAX_SIZE - out_num, true,
                                         desc.addr, desc.len);
         } else {
@@ -1762,8 +1775,8 @@  static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
                 virtio_error(vdev, "Incorrect order for descriptors");
                 goto err_undo_map;
             }
-            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
-                                        VIRTQUEUE_MAX_SIZE, false,
+            map_ok = virtqueue_map_desc(vdev, &out_num, addr, xlat_addr,
+                                        iov, VIRTQUEUE_MAX_SIZE, false,
                                         desc.addr, desc.len);
         }
         if (!map_ok) {
@@ -1790,10 +1803,12 @@  static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
     for (i = 0; i < out_num; i++) {
         elem->out_addr[i] = addr[i];
         elem->out_sg[i] = iov[i];
+        elem->out_xlat_addr[i] = xlat_addr[i];
     }
     for (i = 0; i < in_num; i++) {
         elem->in_addr[i] = addr[out_num + i];
         elem->in_sg[i] = iov[out_num + i];
+        elem->in_xlat_addr[i] = xlat_addr[out_num + i];
     }
 
     if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) {
@@ -1827,6 +1842,7 @@  static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
     VirtQueueElement *elem = NULL;
     unsigned out_num, in_num, elem_entries;
     hwaddr addr[VIRTQUEUE_MAX_SIZE];
+    hwaddr xlat_addr[VIRTQUEUE_MAX_SIZE];
     struct iovec iov[VIRTQUEUE_MAX_SIZE];
     VRingPackedDesc desc;
     uint16_t id;
@@ -1891,7 +1907,7 @@  static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
 
         if (desc.flags & VRING_DESC_F_WRITE) {
             map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
-                                        iov + out_num,
+                                        xlat_addr + out_num, iov + out_num,
                                         VIRTQUEUE_MAX_SIZE - out_num, true,
                                         desc.addr, desc.len);
         } else {
@@ -1899,7 +1915,7 @@  static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
                 virtio_error(vdev, "Incorrect order for descriptors");
                 goto err_undo_map;
             }
-            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
+            map_ok = virtqueue_map_desc(vdev, &out_num, addr, xlat_addr, iov,
                                         VIRTQUEUE_MAX_SIZE, false,
                                         desc.addr, desc.len);
         }
@@ -1928,10 +1944,12 @@  static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
     for (i = 0; i < out_num; i++) {
         elem->out_addr[i] = addr[i];
         elem->out_sg[i] = iov[i];
+        elem->out_xlat_addr[i] = xlat_addr[i];
     }
     for (i = 0; i < in_num; i++) {
         elem->in_addr[i] = addr[out_num + i];
         elem->in_sg[i] = iov[out_num + i];
+        elem->in_xlat_addr[i] = xlat_addr[out_num + i];
     }
 
     elem->index = id;
@@ -2117,10 +2135,14 @@  void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
     elem->index = data.index;
 
     for (i = 0; i < elem->in_num; i++) {
+        /* xlat_addr is overwritten by virtqueue_map */
+        elem->in_xlat_addr[i] = 0;
         elem->in_addr[i] = data.in_addr[i];
     }
 
     for (i = 0; i < elem->out_num; i++) {
+        /* xlat_addr is overwritten by virtqueue_map */
+        elem->out_xlat_addr[i] = 0;
         elem->out_addr[i] = data.out_addr[i];
     }
 
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index 8eaf0d58bb..e2bb453dcc 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -328,7 +328,7 @@  static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t addr,
                                 dma_addr_t *plen, DMADirection dir)
 {
     return dma_memory_map(pci_get_address_space(dev), addr, plen, dir,
-                          MEMTXATTRS_UNSPECIFIED);
+                          MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 6386910280..e822aafd91 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -75,6 +75,8 @@  typedef struct VirtQueueElement
     hwaddr *out_addr;
     struct iovec *in_sg;
     struct iovec *out_sg;
+    hwaddr *in_xlat_addr;
+    hwaddr *out_xlat_addr;
 } VirtQueueElement;
 
 #define VIRTIO_QUEUE_MAX 1024
diff --git a/include/system/dma.h b/include/system/dma.h
index 5a49a30628..b5d4c07452 100644
--- a/include/system/dma.h
+++ b/include/system/dma.h
@@ -12,6 +12,7 @@ 
 
 #include "exec/memory.h"
 #include "exec/address-spaces.h"
+#include "exec/ramblock.h"
 #include "block/block.h"
 #include "block/accounting.h"
 
@@ -201,10 +202,12 @@  MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
  * @len: pointer to length of buffer; updated on return
  * @dir: indicates the transfer direction
  * @attrs: memory attributes
+ * @guest_addr: optional output for GPA
  */
 static inline void *dma_memory_map(AddressSpace *as,
                                    dma_addr_t addr, dma_addr_t *len,
-                                   DMADirection dir, MemTxAttrs attrs)
+                                   DMADirection dir, MemTxAttrs attrs,
+                                   hwaddr *guest_addr)
 {
     hwaddr xlen = *len;
     void *p;
@@ -212,6 +215,26 @@  static inline void *dma_memory_map(AddressSpace *as,
     p = address_space_map(as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE,
                           attrs);
     *len = xlen;
+
+    /* Attempt to find a backing GPA for this HVA */
+    if (guest_addr) {
+        if (p) {
+            RAMBlock *rb;
+            ram_addr_t offset;
+
+            rb = qemu_ram_block_from_host(p, false, &offset);
+            if (rb) {
+                /* HVA corresponds to guest memory */
+                *guest_addr = rb->offset + offset;
+            } else {
+                /* HVA doesn't correspond to guest memory */
+                *guest_addr = 0;
+            }
+        } else {
+            /* Mapping failed */
+            *guest_addr = 0;
+        }
+    }
     return p;
 }
 
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
index f6403242f5..a6d2352c0f 100644
--- a/system/dma-helpers.c
+++ b/system/dma-helpers.c
@@ -135,7 +135,7 @@  static void dma_blk_cb(void *opaque, int ret)
         cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
         cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
         mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
-                             MEMTXATTRS_UNSPECIFIED);
+                             MEMTXATTRS_UNSPECIFIED, NULL);
         /*
          * Make reads deterministic in icount mode. Windows sometimes issues
          * disk read requests with overlapping SGs. It leads