Message ID | 1458872009-13342-2-git-send-email-jasowang@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Mar 25, 2016 at 10:13:22AM +0800, Jason Wang wrote: > Currently, all virtio devices bypass IOMMU completely. This is because > address_space_memory is assumed and used during DMA emulation. This > patch converts the virtio core API to use DMA API. This idea is > > - introducing a new transport specific helper to query the dma address > space. (only pci version is implemented). > - query and use this address space during virtio device guest memory > accessing > > With this virtio devices will not bypass IOMMU anymore. Tested with > intel_iommu=on/strict with: > > - virtio guest DMA series posted in https://lkml.org/lkml/2015/10/28/64. > - vfio (unsafe interrupt mode) dpdk l2fwd in guest > > TODO: > - Feature bit for this > - Implement this for all transports Nice. The only thing that worries me here is that ring addresses are only translated once at DRIVER_OK. In theory, rings could be non-contigious and mapped to contigious ranges of bus addresses by the IOMMU. Might be useful for very large rings or memory-constrained guests. Thoughts? Is this worth worrying about? > Cc: Michael S. Tsirkin <mst@redhat.com> > Cc: Stefan Hajnoczi <stefanha@redhat.com> > Cc: Kevin Wolf <kwolf@redhat.com> > Cc: Amit Shah <amit.shah@redhat.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: qemu-block@nongnu.org > Signed-off-by: Jason Wang <jasowang@redhat.com> > --- > hw/block/virtio-blk.c | 2 +- > hw/char/virtio-serial-bus.c | 3 +- > hw/scsi/virtio-scsi.c | 4 ++- > hw/virtio/virtio-pci.c | 9 ++++++ > hw/virtio/virtio.c | 58 +++++++++++++++++++++++---------------- > include/hw/virtio/virtio-access.h | 42 +++++++++++++++++++++------- > include/hw/virtio/virtio-bus.h | 1 + > include/hw/virtio/virtio.h | 4 +-- > 8 files changed, 85 insertions(+), 38 deletions(-) > > diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c > index cb710f1..9411f99 100644 > --- a/hw/block/virtio-blk.c > +++ b/hw/block/virtio-blk.c > @@ -829,7 +829,7 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, > > while (qemu_get_sbyte(f)) { > VirtIOBlockReq *req; > - req = qemu_get_virtqueue_element(f, sizeof(VirtIOBlockReq)); > + req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq)); > virtio_blk_init_request(s, req); > req->next = s->rq; > s->rq = req; > diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c > index 99cb683..bdc5393 100644 > --- a/hw/char/virtio-serial-bus.c > +++ b/hw/char/virtio-serial-bus.c > @@ -687,6 +687,7 @@ static void virtio_serial_post_load_timer_cb(void *opaque) > static int fetch_active_ports_list(QEMUFile *f, int version_id, > VirtIOSerial *s, uint32_t nr_active_ports) > { > + VirtIODevice *vdev = VIRTIO_DEVICE(s); > uint32_t i; > > s->post_load = g_malloc0(sizeof(*s->post_load)); > @@ -722,7 +723,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id, > qemu_get_be64s(f, &port->iov_offset); > > port->elem = > - qemu_get_virtqueue_element(f, sizeof(VirtQueueElement)); > + qemu_get_virtqueue_element(vdev, f, sizeof(VirtQueueElement)); > > /* > * Port was throttled on source machine. Let's > diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c > index 0c30d2e..26ce701 100644 > --- a/hw/scsi/virtio-scsi.c > +++ b/hw/scsi/virtio-scsi.c > @@ -196,12 +196,14 @@ static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq) > SCSIBus *bus = sreq->bus; > VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); > VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); > + VirtIODevice *vdev = VIRTIO_DEVICE(s); > VirtIOSCSIReq *req; > uint32_t n; > > qemu_get_be32s(f, &n); > assert(n < vs->conf.num_queues); > - req = qemu_get_virtqueue_element(f, sizeof(VirtIOSCSIReq) + vs->cdb_size); > + req = qemu_get_virtqueue_element(vdev, f, > + sizeof(VirtIOSCSIReq) + vs->cdb_size); > virtio_scsi_init_req(s, vs->cmd_vqs[n], req); > > if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size, > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > index 0dadb66..5508b1c 100644 > --- a/hw/virtio/virtio-pci.c > +++ b/hw/virtio/virtio-pci.c > @@ -1211,6 +1211,14 @@ static int virtio_pci_query_nvectors(DeviceState *d) > return proxy->nvectors; > } > > +static AddressSpace *virtio_pci_get_dma_as(DeviceState *d) > +{ > + VirtIOPCIProxy *proxy = VIRTIO_PCI(d); > + PCIDevice *dev = &proxy->pci_dev; > + > + return pci_get_address_space(dev); > +} > + > static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, > struct virtio_pci_cap *cap) > { > @@ -2495,6 +2503,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data) > k->device_plugged = virtio_pci_device_plugged; > k->device_unplugged = virtio_pci_device_unplugged; > k->query_nvectors = virtio_pci_query_nvectors; > + k->get_dma_as = virtio_pci_get_dma_as; > } > > static const TypeInfo virtio_pci_bus_info = { > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > index 08275a9..37c9951 100644 > --- a/hw/virtio/virtio.c > +++ b/hw/virtio/virtio.c > @@ -21,6 +21,7 @@ > #include "hw/virtio/virtio-bus.h" > #include "migration/migration.h" > #include "hw/virtio/virtio-access.h" > +#include "sysemu/dma.h" > > /* > * The alignment to use between consumer and producer parts of vring. > @@ -118,7 +119,7 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n) > static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, > hwaddr desc_pa, int i) > { > - address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc), > + address_space_read(virtio_get_dma_as(vdev), desc_pa + i * sizeof(VRingDesc), > MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc)); > virtio_tswap64s(vdev, &desc->addr); > virtio_tswap32s(vdev, &desc->len); > @@ -160,7 +161,7 @@ static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, > virtio_tswap32s(vq->vdev, &uelem->id); > virtio_tswap32s(vq->vdev, &uelem->len); > pa = vq->vring.used + offsetof(VRingUsed, ring[i]); > - address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED, > + address_space_write(virtio_get_dma_as(vq->vdev), pa, MEMTXATTRS_UNSPECIFIED, > (void *)uelem, sizeof(VRingUsedElem)); > } > > @@ -240,6 +241,7 @@ int virtio_queue_empty(VirtQueue *vq) > static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, > unsigned int len) > { > + AddressSpace *dma_as = virtio_get_dma_as(vq->vdev); > unsigned int offset; > int i; > > @@ -247,17 +249,17 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, > for (i = 0; i < elem->in_num; i++) { > size_t size = MIN(len - offset, elem->in_sg[i].iov_len); > > - cpu_physical_memory_unmap(elem->in_sg[i].iov_base, > - elem->in_sg[i].iov_len, > - 1, size); > + dma_memory_unmap(dma_as, elem->in_sg[i].iov_base, elem->in_sg[i].iov_len, > + DMA_DIRECTION_FROM_DEVICE, size); > > offset += size; > } > > for (i = 0; i < elem->out_num; i++) > - cpu_physical_memory_unmap(elem->out_sg[i].iov_base, > - elem->out_sg[i].iov_len, > - 0, elem->out_sg[i].iov_len); > + dma_memory_unmap(dma_as, elem->out_sg[i].iov_base, > + elem->out_sg[i].iov_len, > + DMA_DIRECTION_TO_DEVICE, > + elem->out_sg[i].iov_len); > } > > void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, > @@ -447,7 +449,8 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, > return in_bytes <= in_total && out_bytes <= out_total; > } > > -static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov, > +static void virtqueue_map_desc(VirtIODevice *vdev, > + unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov, > unsigned int max_num_sg, bool is_write, > hwaddr pa, size_t sz) > { > @@ -462,7 +465,10 @@ static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove > exit(1); > } > > - iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write); > + iov[num_sg].iov_base = dma_memory_map(virtio_get_dma_as(vdev), pa, &len, > + is_write ? > + DMA_DIRECTION_FROM_DEVICE: > + DMA_DIRECTION_TO_DEVICE); > iov[num_sg].iov_len = len; > addr[num_sg] = pa; > > @@ -473,9 +479,9 @@ static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove > *p_num_sg = num_sg; > } > > -static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, > - unsigned int *num_sg, unsigned int max_size, > - int is_write) > +static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg, > + hwaddr *addr, unsigned int *num_sg, > + unsigned int max_size, int is_write) > { > unsigned int i; > hwaddr len; > @@ -494,7 +500,10 @@ static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, > > for (i = 0; i < *num_sg; i++) { > len = sg[i].iov_len; > - sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); > + sg[i].iov_base = dma_memory_map(virtio_get_dma_as(vdev), > + addr[i], &len, is_write ? > + DMA_DIRECTION_FROM_DEVICE : > + DMA_DIRECTION_TO_DEVICE); > if (!sg[i].iov_base) { > error_report("virtio: error trying to map MMIO memory"); > exit(1); > @@ -506,12 +515,15 @@ static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, > } > } > > -void virtqueue_map(VirtQueueElement *elem) > +void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem) > { > - virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num, > - VIRTQUEUE_MAX_SIZE, 1); > - virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num, > - VIRTQUEUE_MAX_SIZE, 0); > + virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num, > + MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)), > + 1); > + virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num, > + MIN(ARRAY_SIZE(elem->out_sg), > + ARRAY_SIZE(elem->out_addr)), > + 0); > } > > void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) > @@ -580,14 +592,14 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz) > /* Collect all the descriptors */ > do { > if (desc.flags & VRING_DESC_F_WRITE) { > - virtqueue_map_desc(&in_num, addr + out_num, iov + out_num, > + virtqueue_map_desc(vdev, &in_num, addr + out_num, iov + out_num, > VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len); > } else { > if (in_num) { > error_report("Incorrect order for descriptors"); > exit(1); > } > - virtqueue_map_desc(&out_num, addr, iov, > + virtqueue_map_desc(vdev, &out_num, addr, iov, > VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len); > } > > @@ -633,7 +645,7 @@ typedef struct VirtQueueElementOld { > struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; > } VirtQueueElementOld; > > -void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz) > +void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz) > { > VirtQueueElement *elem; > VirtQueueElementOld data; > @@ -664,7 +676,7 @@ void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz) > elem->out_sg[i].iov_len = data.out_sg[i].iov_len; > } > > - virtqueue_map(elem); > + virtqueue_map(vdev, elem); > return elem; > } > > diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h > index 8dc84f5..967cc75 100644 > --- a/include/hw/virtio/virtio-access.h > +++ b/include/hw/virtio/virtio-access.h > @@ -15,8 +15,20 @@ > #ifndef _QEMU_VIRTIO_ACCESS_H > #define _QEMU_VIRTIO_ACCESS_H > #include "hw/virtio/virtio.h" > +#include "hw/virtio/virtio-bus.h" > #include "exec/address-spaces.h" > > +static inline AddressSpace *virtio_get_dma_as(VirtIODevice *vdev) > +{ > + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); > + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > + > + if (k->get_dma_as) { > + return k->get_dma_as(qbus->parent); > + } > + return &address_space_memory; > +} > + > static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) > { > #if defined(TARGET_IS_BIENDIAN) > @@ -34,45 +46,55 @@ static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) > > static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa) > { > + AddressSpace *dma_as = virtio_get_dma_as(vdev); > + > if (virtio_access_is_big_endian(vdev)) { > - return lduw_be_phys(&address_space_memory, pa); > + return lduw_be_phys(dma_as, pa); > } > - return lduw_le_phys(&address_space_memory, pa); > + return lduw_le_phys(dma_as, pa); > } > > static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa) > { > + AddressSpace *dma_as = virtio_get_dma_as(vdev); > + > if (virtio_access_is_big_endian(vdev)) { > - return ldl_be_phys(&address_space_memory, pa); > + return ldl_be_phys(dma_as, pa); > } > - return ldl_le_phys(&address_space_memory, pa); > + return ldl_le_phys(dma_as, pa); > } > > static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa) > { > + AddressSpace *dma_as = virtio_get_dma_as(vdev); > + > if (virtio_access_is_big_endian(vdev)) { > - return ldq_be_phys(&address_space_memory, pa); > + return ldq_be_phys(dma_as, pa); > } > - return ldq_le_phys(&address_space_memory, pa); > + return ldq_le_phys(dma_as, pa); > } > > static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa, > uint16_t value) > { > + AddressSpace *dma_as = virtio_get_dma_as(vdev); > + > if (virtio_access_is_big_endian(vdev)) { > - stw_be_phys(&address_space_memory, pa, value); > + stw_be_phys(dma_as, pa, value); > } else { > - stw_le_phys(&address_space_memory, pa, value); > + stw_le_phys(dma_as, pa, value); > } > } > > static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa, > uint32_t value) > { > + AddressSpace *dma_as = virtio_get_dma_as(vdev); > + > if (virtio_access_is_big_endian(vdev)) { > - stl_be_phys(&address_space_memory, pa, value); > + stl_be_phys(dma_as, pa, value); > } else { > - stl_le_phys(&address_space_memory, pa, value); > + stl_le_phys(dma_as, pa, value); > } > } > > diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h > index 3f2c136..17c07af 100644 > --- a/include/hw/virtio/virtio-bus.h > +++ b/include/hw/virtio/virtio-bus.h > @@ -76,6 +76,7 @@ typedef struct VirtioBusClass { > * Note that changing this will break migration for this transport. > */ > bool has_variable_vring_alignment; > + AddressSpace *(*get_dma_as)(DeviceState *d); > } VirtioBusClass; > > struct VirtioBusState { > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > index 2b5b248..0908bf6 100644 > --- a/include/hw/virtio/virtio.h > +++ b/include/hw/virtio/virtio.h > @@ -153,9 +153,9 @@ void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, > void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, > unsigned int len, unsigned int idx); > > -void virtqueue_map(VirtQueueElement *elem); > +void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem); > void *virtqueue_pop(VirtQueue *vq, size_t sz); > -void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz); > +void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz); > void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem); > int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, > unsigned int out_bytes); > -- > 2.5.0
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index cb710f1..9411f99 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -829,7 +829,7 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, while (qemu_get_sbyte(f)) { VirtIOBlockReq *req; - req = qemu_get_virtqueue_element(f, sizeof(VirtIOBlockReq)); + req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq)); virtio_blk_init_request(s, req); req->next = s->rq; s->rq = req; diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index 99cb683..bdc5393 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -687,6 +687,7 @@ static void virtio_serial_post_load_timer_cb(void *opaque) static int fetch_active_ports_list(QEMUFile *f, int version_id, VirtIOSerial *s, uint32_t nr_active_ports) { + VirtIODevice *vdev = VIRTIO_DEVICE(s); uint32_t i; s->post_load = g_malloc0(sizeof(*s->post_load)); @@ -722,7 +723,7 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id, qemu_get_be64s(f, &port->iov_offset); port->elem = - qemu_get_virtqueue_element(f, sizeof(VirtQueueElement)); + qemu_get_virtqueue_element(vdev, f, sizeof(VirtQueueElement)); /* * Port was throttled on source machine. Let's diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 0c30d2e..26ce701 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -196,12 +196,14 @@ static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq) SCSIBus *bus = sreq->bus; VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + VirtIODevice *vdev = VIRTIO_DEVICE(s); VirtIOSCSIReq *req; uint32_t n; qemu_get_be32s(f, &n); assert(n < vs->conf.num_queues); - req = qemu_get_virtqueue_element(f, sizeof(VirtIOSCSIReq) + vs->cdb_size); + req = qemu_get_virtqueue_element(vdev, f, + sizeof(VirtIOSCSIReq) + vs->cdb_size); virtio_scsi_init_req(s, vs->cmd_vqs[n], req); if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size, diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 0dadb66..5508b1c 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1211,6 +1211,14 @@ static int virtio_pci_query_nvectors(DeviceState *d) return proxy->nvectors; } +static AddressSpace *virtio_pci_get_dma_as(DeviceState *d) +{ + VirtIOPCIProxy *proxy = VIRTIO_PCI(d); + PCIDevice *dev = &proxy->pci_dev; + + return pci_get_address_space(dev); +} + static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, struct virtio_pci_cap *cap) { @@ -2495,6 +2503,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data) k->device_plugged = virtio_pci_device_plugged; k->device_unplugged = virtio_pci_device_unplugged; k->query_nvectors = virtio_pci_query_nvectors; + k->get_dma_as = virtio_pci_get_dma_as; } static const TypeInfo virtio_pci_bus_info = { diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 08275a9..37c9951 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -21,6 +21,7 @@ #include "hw/virtio/virtio-bus.h" #include "migration/migration.h" #include "hw/virtio/virtio-access.h" +#include "sysemu/dma.h" /* * The alignment to use between consumer and producer parts of vring. @@ -118,7 +119,7 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n) static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, hwaddr desc_pa, int i) { - address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc), + address_space_read(virtio_get_dma_as(vdev), desc_pa + i * sizeof(VRingDesc), MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc)); virtio_tswap64s(vdev, &desc->addr); virtio_tswap32s(vdev, &desc->len); @@ -160,7 +161,7 @@ static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, virtio_tswap32s(vq->vdev, &uelem->id); virtio_tswap32s(vq->vdev, &uelem->len); pa = vq->vring.used + offsetof(VRingUsed, ring[i]); - address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED, + address_space_write(virtio_get_dma_as(vq->vdev), pa, MEMTXATTRS_UNSPECIFIED, (void *)uelem, sizeof(VRingUsedElem)); } @@ -240,6 +241,7 @@ int virtio_queue_empty(VirtQueue *vq) static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { + AddressSpace *dma_as = virtio_get_dma_as(vq->vdev); unsigned int offset; int i; @@ -247,17 +249,17 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, for (i = 0; i < elem->in_num; i++) { size_t size = MIN(len - offset, elem->in_sg[i].iov_len); - cpu_physical_memory_unmap(elem->in_sg[i].iov_base, - elem->in_sg[i].iov_len, - 1, size); + dma_memory_unmap(dma_as, elem->in_sg[i].iov_base, elem->in_sg[i].iov_len, + DMA_DIRECTION_FROM_DEVICE, size); offset += size; } for (i = 0; i < elem->out_num; i++) - cpu_physical_memory_unmap(elem->out_sg[i].iov_base, - elem->out_sg[i].iov_len, - 0, elem->out_sg[i].iov_len); + dma_memory_unmap(dma_as, elem->out_sg[i].iov_base, + elem->out_sg[i].iov_len, + DMA_DIRECTION_TO_DEVICE, + elem->out_sg[i].iov_len); } void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, @@ -447,7 +449,8 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, return in_bytes <= in_total && out_bytes <= out_total; } -static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov, +static void virtqueue_map_desc(VirtIODevice *vdev, + unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov, unsigned int max_num_sg, bool is_write, hwaddr pa, size_t sz) { @@ -462,7 +465,10 @@ static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove exit(1); } - iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write); + iov[num_sg].iov_base = dma_memory_map(virtio_get_dma_as(vdev), pa, &len, + is_write ? + DMA_DIRECTION_FROM_DEVICE: + DMA_DIRECTION_TO_DEVICE); iov[num_sg].iov_len = len; addr[num_sg] = pa; @@ -473,9 +479,9 @@ static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove *p_num_sg = num_sg; } -static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, - unsigned int *num_sg, unsigned int max_size, - int is_write) +static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg, + hwaddr *addr, unsigned int *num_sg, + unsigned int max_size, int is_write) { unsigned int i; hwaddr len; @@ -494,7 +500,10 @@ static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, for (i = 0; i < *num_sg; i++) { len = sg[i].iov_len; - sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); + sg[i].iov_base = dma_memory_map(virtio_get_dma_as(vdev), + addr[i], &len, is_write ? + DMA_DIRECTION_FROM_DEVICE : + DMA_DIRECTION_TO_DEVICE); if (!sg[i].iov_base) { error_report("virtio: error trying to map MMIO memory"); exit(1); @@ -506,12 +515,15 @@ static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr, } } -void virtqueue_map(VirtQueueElement *elem) +void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem) { - virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num, - VIRTQUEUE_MAX_SIZE, 1); - virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num, - VIRTQUEUE_MAX_SIZE, 0); + virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num, + MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)), + 1); + virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num, + MIN(ARRAY_SIZE(elem->out_sg), + ARRAY_SIZE(elem->out_addr)), + 0); } void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) @@ -580,14 +592,14 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz) /* Collect all the descriptors */ do { if (desc.flags & VRING_DESC_F_WRITE) { - virtqueue_map_desc(&in_num, addr + out_num, iov + out_num, + virtqueue_map_desc(vdev, &in_num, addr + out_num, iov + out_num, VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len); } else { if (in_num) { error_report("Incorrect order for descriptors"); exit(1); } - virtqueue_map_desc(&out_num, addr, iov, + virtqueue_map_desc(vdev, &out_num, addr, iov, VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len); } @@ -633,7 +645,7 @@ typedef struct VirtQueueElementOld { struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; } VirtQueueElementOld; -void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz) +void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz) { VirtQueueElement *elem; VirtQueueElementOld data; @@ -664,7 +676,7 @@ void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz) elem->out_sg[i].iov_len = data.out_sg[i].iov_len; } - virtqueue_map(elem); + virtqueue_map(vdev, elem); return elem; } diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h index 8dc84f5..967cc75 100644 --- a/include/hw/virtio/virtio-access.h +++ b/include/hw/virtio/virtio-access.h @@ -15,8 +15,20 @@ #ifndef _QEMU_VIRTIO_ACCESS_H #define _QEMU_VIRTIO_ACCESS_H #include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" #include "exec/address-spaces.h" +static inline AddressSpace *virtio_get_dma_as(VirtIODevice *vdev) +{ + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + + if (k->get_dma_as) { + return k->get_dma_as(qbus->parent); + } + return &address_space_memory; +} + static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) { #if defined(TARGET_IS_BIENDIAN) @@ -34,45 +46,55 @@ static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa) { + AddressSpace *dma_as = virtio_get_dma_as(vdev); + if (virtio_access_is_big_endian(vdev)) { - return lduw_be_phys(&address_space_memory, pa); + return lduw_be_phys(dma_as, pa); } - return lduw_le_phys(&address_space_memory, pa); + return lduw_le_phys(dma_as, pa); } static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa) { + AddressSpace *dma_as = virtio_get_dma_as(vdev); + if (virtio_access_is_big_endian(vdev)) { - return ldl_be_phys(&address_space_memory, pa); + return ldl_be_phys(dma_as, pa); } - return ldl_le_phys(&address_space_memory, pa); + return ldl_le_phys(dma_as, pa); } static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa) { + AddressSpace *dma_as = virtio_get_dma_as(vdev); + if (virtio_access_is_big_endian(vdev)) { - return ldq_be_phys(&address_space_memory, pa); + return ldq_be_phys(dma_as, pa); } - return ldq_le_phys(&address_space_memory, pa); + return ldq_le_phys(dma_as, pa); } static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa, uint16_t value) { + AddressSpace *dma_as = virtio_get_dma_as(vdev); + if (virtio_access_is_big_endian(vdev)) { - stw_be_phys(&address_space_memory, pa, value); + stw_be_phys(dma_as, pa, value); } else { - stw_le_phys(&address_space_memory, pa, value); + stw_le_phys(dma_as, pa, value); } } static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa, uint32_t value) { + AddressSpace *dma_as = virtio_get_dma_as(vdev); + if (virtio_access_is_big_endian(vdev)) { - stl_be_phys(&address_space_memory, pa, value); + stl_be_phys(dma_as, pa, value); } else { - stl_le_phys(&address_space_memory, pa, value); + stl_le_phys(dma_as, pa, value); } } diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h index 3f2c136..17c07af 100644 --- a/include/hw/virtio/virtio-bus.h +++ b/include/hw/virtio/virtio-bus.h @@ -76,6 +76,7 @@ typedef struct VirtioBusClass { * Note that changing this will break migration for this transport. */ bool has_variable_vring_alignment; + AddressSpace *(*get_dma_as)(DeviceState *d); } VirtioBusClass; struct VirtioBusState { diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 2b5b248..0908bf6 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -153,9 +153,9 @@ void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx); -void virtqueue_map(VirtQueueElement *elem); +void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem); void *virtqueue_pop(VirtQueue *vq, size_t sz); -void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz); +void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz); void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem); int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, unsigned int out_bytes);
Currently, all virtio devices bypass IOMMU completely. This is because address_space_memory is assumed and used during DMA emulation. This patch converts the virtio core API to use DMA API. This idea is - introducing a new transport specific helper to query the dma address space. (only pci version is implemented). - query and use this address space during virtio device guest memory accessing With this virtio devices will not bypass IOMMU anymore. Tested with intel_iommu=on/strict with: - virtio guest DMA series posted in https://lkml.org/lkml/2015/10/28/64. - vfio (unsafe interrupt mode) dpdk l2fwd in guest TODO: - Feature bit for this - Implement this for all transports Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Stefan Hajnoczi <stefanha@redhat.com> Cc: Kevin Wolf <kwolf@redhat.com> Cc: Amit Shah <amit.shah@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: qemu-block@nongnu.org Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/block/virtio-blk.c | 2 +- hw/char/virtio-serial-bus.c | 3 +- hw/scsi/virtio-scsi.c | 4 ++- hw/virtio/virtio-pci.c | 9 ++++++ hw/virtio/virtio.c | 58 +++++++++++++++++++++++---------------- include/hw/virtio/virtio-access.h | 42 +++++++++++++++++++++------- include/hw/virtio/virtio-bus.h | 1 + include/hw/virtio/virtio.h | 4 +-- 8 files changed, 85 insertions(+), 38 deletions(-)