@@ -2,6 +2,7 @@
#define VHOST_H
#include "hw/virtio/vhost-backend.h"
+#include "hw/virtio/vhost-iova-tree.h"
#include "hw/virtio/virtio.h"
#include "exec/memory.h"
@@ -88,6 +89,8 @@ struct vhost_dev {
bool log_enabled;
bool shadow_vqs_enabled;
uint64_t log_size;
+ /* IOVA mapping used by Shadow Virtqueue */
+ VhostIOVATree iova_map;
struct {
hwaddr first;
hwaddr last;
@@ -1013,31 +1013,45 @@ static int vhost_memory_region_lookup(struct vhost_dev *hdev,
int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
{
- IOMMUTLBEntry iotlb;
+ IOMMUAccessFlags perm;
uint64_t uaddr, len;
int ret = -EFAULT;
- RCU_READ_LOCK_GUARD();
-
trace_vhost_iotlb_miss(dev, 1);
if (dev->shadow_vqs_enabled) {
- uaddr = iova;
- len = 4096;
- ret = vhost_backend_update_device_iotlb(dev, iova, uaddr, len,
- IOMMU_RW);
- if (ret) {
- trace_vhost_iotlb_miss(dev, 2);
- error_report("Fail to update device iotlb");
+ /* Shadow virtqueue translations in its Virtual Address Space */
+ const VhostDMAMap *result;
+ const VhostDMAMap needle = {
+ .iova = iova,
+ };
+
+ result = vhost_iova_tree_find_taddr(&dev->iova_map, &needle);
+
+ if (unlikely(!result)) {
+ goto out;
}
- return ret;
- }
+ iova = result->iova;
+ uaddr = (uint64_t)result->translated_addr;
+ /*
+ * In IOVATree, result.iova + result.size is the last element of iova.
+ * For vhost, it is one past that last element.
+ */
+ len = result->size + 1;
+ perm = result->perm;
+ } else {
+ IOMMUTLBEntry iotlb;
+
+ RCU_READ_LOCK_GUARD();
+ iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
+ iova, write,
+ MEMTXATTRS_UNSPECIFIED);
+
+ if (iotlb.target_as == NULL) {
+ goto out;
+ }
- iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
- iova, write,
- MEMTXATTRS_UNSPECIFIED);
- if (iotlb.target_as != NULL) {
ret = vhost_memory_region_lookup(dev, iotlb.translated_addr,
&uaddr, &len);
if (ret) {
@@ -1049,14 +1063,14 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
len = MIN(iotlb.addr_mask + 1, len);
iova = iova & ~iotlb.addr_mask;
+ perm = iotlb.perm;
+ }
- ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
- len, iotlb.perm);
- if (ret) {
- trace_vhost_iotlb_miss(dev, 4);
- error_report("Fail to update device iotlb");
- goto out;
- }
+ ret = vhost_backend_update_device_iotlb(dev, iova, uaddr, len, perm);
+ if (ret) {
+ trace_vhost_iotlb_miss(dev, 4);
+ error_report("Fail to update device iotlb");
+ goto out;
}
trace_vhost_iotlb_miss(dev, 2);
@@ -1249,7 +1263,7 @@ static int vhost_sw_live_migration_stop(struct vhost_dev *dev)
if (r) {
error_report("Fail to invalidate device iotlb");
}
-
+ vhost_iova_tree_destroy(&dev->iova_map);
for (idx = 0; idx < dev->nvqs; ++idx) {
struct vhost_virtqueue *vq = dev->vqs + idx;
if (vhost_dev_has_iommu(dev) &&
@@ -1279,6 +1293,26 @@ static int vhost_sw_live_migration_stop(struct vhost_dev *dev)
return 0;
}
+static bool vhost_shadow_vq_start_store_sections(struct vhost_dev *dev)
+{
+ int idx;
+
+ for (idx = 0; idx < dev->n_mem_sections; ++idx) {
+ size_t region_size = dev->mem->regions[idx].memory_size;
+ VhostDMAMap region = {
+ .iova = dev->mem->regions[idx].userspace_addr,
+ .translated_addr = (void *)dev->mem->regions[idx].userspace_addr,
+ .size = region_size - 1,
+ .perm = VHOST_ACCESS_RW,
+ };
+
+ VhostDMAMapNewRC r = vhost_iova_tree_insert(&dev->iova_map, ®ion);
+ assert(r == VHOST_DMA_MAP_OK);
+ }
+
+ return true;
+}
+
/*
* Start shadow virtqueue in a given queue.
* In failure case, this function leaves queue working as regular vhost mode.
@@ -1292,9 +1326,37 @@ static bool vhost_sw_live_migration_start_vq(struct vhost_dev *dev,
struct vhost_vring_state s = {
.index = idx,
};
+ VhostDMAMap driver_region, device_region;
+
int r;
bool ok;
+ assert(dev->shadow_vqs[idx] != NULL);
+ vhost_shadow_vq_get_vring_addr(dev->shadow_vqs[idx], &addr);
+ driver_region = (VhostDMAMap) {
+ .iova = addr.desc_user_addr,
+ .translated_addr = (void *)addr.desc_user_addr,
+
+ /*
+ * DMAMAp.size include the last byte included in the range, while
+ * sizeof marks one past it. Substract one byte to make them match.
+ */
+ .size = vhost_shadow_vq_driver_area_size(dev->shadow_vqs[idx]) - 1,
+ .perm = VHOST_ACCESS_RO,
+ };
+ device_region = (VhostDMAMap) {
+ .iova = addr.used_user_addr,
+ .translated_addr = (void *)addr.used_user_addr,
+ .size = vhost_shadow_vq_device_area_size(dev->shadow_vqs[idx]) - 1,
+ .perm = VHOST_ACCESS_RW,
+ };
+
+ r = vhost_iova_tree_insert(&dev->iova_map, &driver_region);
+ assert(r == VHOST_DMA_MAP_OK);
+
+ r = vhost_iova_tree_insert(&dev->iova_map, &device_region);
+ assert(r == VHOST_DMA_MAP_OK);
+
vhost_virtqueue_stop(dev, dev->vdev, &dev->vqs[idx], dev->vq_index + idx);
ok = vhost_shadow_vq_start(dev, idx, dev->shadow_vqs[idx]);
if (unlikely(!ok)) {
@@ -1302,7 +1364,6 @@ static bool vhost_sw_live_migration_start_vq(struct vhost_dev *dev,
}
/* From this point, vhost_virtqueue_start can reset these changes */
- vhost_shadow_vq_get_vring_addr(dev->shadow_vqs[idx], &addr);
r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
if (unlikely(r != 0)) {
VHOST_OPS_DEBUG("vhost_set_vring_addr for shadow vq failed");
@@ -1315,6 +1376,7 @@ static bool vhost_sw_live_migration_start_vq(struct vhost_dev *dev,
goto err;
}
+
if (vhost_dev_has_iommu(dev) && dev->vhost_ops->vhost_set_iotlb_callback) {
/*
* Update used ring information for IOTLB to work correctly,
@@ -1357,6 +1419,15 @@ static int vhost_sw_live_migration_start(struct vhost_dev *dev)
error_report("Fail to invalidate device iotlb");
}
+ /*
+ * Create new iova mappings. SVQ always expose qemu's VA.
+ * TODO: Fine tune the exported mapping. Default vhost does not expose
+ * everything.
+ */
+
+ vhost_iova_tree_new(&dev->iova_map);
+ vhost_shadow_vq_start_store_sections(dev);
+
/* Can be read by vhost_virtqueue_mask, from vm exit */
dev->shadow_vqs_enabled = true;
for (idx = 0; idx < dev->nvqs; ++idx) {
At the moment, the tree is only used to store 1:1 maps of the qemu virtual addresses of shadow virtqueue vring and the guest's addresses. In other words, the tree only serves to check if the address the guest exposed is valid at the moment qemu receives the miss. It does not work if device has restrictions in its iova range at the moment. Updates to tree are protected by BQL, each one always run from main event loop context. vhost_device_iotlb_miss runs in the same one on reading it. Signed-off-by: Eugenio PĂ©rez <eperezma@redhat.com> --- include/hw/virtio/vhost.h | 3 + hw/virtio/vhost.c | 121 ++++++++++++++++++++++++++++++-------- 2 files changed, 99 insertions(+), 25 deletions(-)