@@ -24,5 +24,10 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map,
hwaddr taddr);
void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
+const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *iova_tree,
+ const DMAMap *map);
+int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *iova_tree, DMAMap *map,
+ hwaddr taddr);
+void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map);
#endif
@@ -118,8 +118,9 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq);
void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
const VirtQueueElement *elem, uint32_t len);
int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
- size_t out_num, const struct iovec *in_sg, size_t in_num,
- VirtQueueElement *elem);
+ size_t out_num, const hwaddr *out_addr,
+ const struct iovec *in_sg, size_t in_num,
+ const hwaddr *in_addr, VirtQueueElement *elem);
size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num);
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
@@ -40,6 +40,28 @@ typedef struct DMAMap {
} QEMU_PACKED DMAMap;
typedef gboolean (*iova_tree_iterator)(DMAMap *map);
+/**
+ * gpa_tree_new:
+ *
+ * Create a new GPA->IOVA tree.
+ *
+ * Returns: the tree point on success, or NULL otherwise.
+ */
+IOVATree *gpa_tree_new(void);
+
+/**
+ * gpa_tree_insert:
+ *
+ * @tree: The GPA->IOVA tree we're inserting the mapping to
+ * @map: The GPA->IOVA mapping to insert
+ *
+ * Inserts a GPA range to the GPA->IOVA tree. If there are overlapped
+ * ranges, IOVA_ERR_OVERLAP will be returned.
+ *
+ * Return: 0 if successful, < 0 otherwise.
+ */
+int gpa_tree_insert(IOVATree *tree, const DMAMap *map);
+
/**
* iova_tree_new:
*
@@ -31,6 +31,9 @@ struct VhostIOVATree {
/* Allocated IOVA addresses */
IOVATree *iova_map;
+
+ /* GPA->IOVA address memory maps */
+ IOVATree *gpa_iova_map;
};
/**
@@ -48,6 +51,7 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
tree->iova_taddr_map = iova_tree_new();
tree->iova_map = iova_tree_new();
+ tree->gpa_iova_map = gpa_tree_new();
return tree;
}
@@ -58,6 +62,7 @@ void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
{
iova_tree_destroy(iova_tree->iova_taddr_map);
iova_tree_destroy(iova_tree->iova_map);
+ iova_tree_destroy(iova_tree->gpa_iova_map);
g_free(iova_tree);
}
@@ -122,3 +127,65 @@ void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map)
iova_tree_remove(iova_tree->iova_taddr_map, map);
iova_tree_remove(iova_tree->iova_map, map);
}
+
+/**
+ * Find the IOVA address stored from a guest memory address (GPA)
+ *
+ * @tree: The VhostIOVATree
+ * @map: The map with the guest memory address
+ *
+ * Returns the stored GPA->IOVA mapping, or NULL if not found.
+ */
+const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *tree,
+ const DMAMap *map)
+{
+ return iova_tree_find_iova(tree->gpa_iova_map, map);
+}
+
+/**
+ * Allocate a new IOVA range and add the mapping to the GPA->IOVA tree
+ *
+ * @tree: The VhostIOVATree
+ * @map: The IOVA mapping
+ * @taddr: The translated address (GPA)
+ *
+ * Returns:
+ * - IOVA_OK if the map fits both containers
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
+ * - IOVA_ERR_NOMEM if the IOVA-only tree cannot allocate more space
+ *
+ * It returns an assigned IOVA in map->iova if the return value is IOVA_OK.
+ */
+int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *tree, DMAMap *map, hwaddr taddr)
+{
+ int ret;
+
+ /* Some vhost devices don't like addr 0. Skip first page */
+ hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size();
+
+ if (taddr + map->size < taddr || map->perm == IOMMU_NONE) {
+ return IOVA_ERR_INVALID;
+ }
+
+ /* Allocate a node in the IOVA-only tree */
+ ret = iova_tree_alloc_map(tree->iova_map, map, iova_first, tree->iova_last);
+ if (unlikely(ret != IOVA_OK)) {
+ return ret;
+ }
+
+ /* Insert a node in the GPA->IOVA tree */
+ map->translated_addr = taddr;
+ return gpa_tree_insert(tree->gpa_iova_map, map);
+}
+
+/**
+ * Remove existing mappings from the IOVA-only and GPA->IOVA trees
+ *
+ * @tree: The VhostIOVATree
+ * @map: The map to remove
+ */
+void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map)
+{
+ iova_tree_remove(iova_tree->gpa_iova_map, map);
+ iova_tree_remove(iova_tree->iova_map, map);
+}
@@ -78,24 +78,39 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
* @vaddr: Translated IOVA addresses
* @iovec: Source qemu's VA addresses
* @num: Length of iovec and minimum length of vaddr
+ * @gpas: Descriptors' GPAs, if backed by guest memory
*/
static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
hwaddr *addrs, const struct iovec *iovec,
- size_t num)
+ size_t num, const hwaddr *gpas)
{
if (num == 0) {
return true;
}
for (size_t i = 0; i < num; ++i) {
- DMAMap needle = {
- .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
- .size = iovec[i].iov_len,
- };
Int128 needle_last, map_last;
size_t off;
+ const DMAMap *map;
+ DMAMap needle;
+
+ /* Check if the descriptor is backed by guest memory */
+ if (gpas) {
+ /* Search the GPA->IOVA tree */
+ needle = (DMAMap) {
+ .translated_addr = gpas[i],
+ .size = iovec[i].iov_len,
+ };
+ map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle);
+ } else {
+ /* Search the IOVA->HVA tree */
+ needle = (DMAMap) {
+ .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
+ .size = iovec[i].iov_len,
+ };
+ map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
+ }
- const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
/*
* Map cannot be NULL since iova map contains all guest space and
* qemu already has a physical address mapped
@@ -130,6 +145,7 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
* @sg: Cache for hwaddr
* @iovec: The iovec from the guest
* @num: iovec length
+ * @addr: Descriptors' GPAs, if backed by guest memory
* @more_descs: True if more descriptors come in the chain
* @write: True if they are writeable descriptors
*
@@ -137,7 +153,8 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
*/
static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
const struct iovec *iovec, size_t num,
- bool more_descs, bool write)
+ const hwaddr *addr, bool more_descs,
+ bool write)
{
uint16_t i = svq->free_head, last = svq->free_head;
unsigned n;
@@ -149,7 +166,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
return true;
}
- ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+ ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr);
if (unlikely(!ok)) {
return false;
}
@@ -174,8 +191,9 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
const struct iovec *out_sg, size_t out_num,
+ const hwaddr *out_addr,
const struct iovec *in_sg, size_t in_num,
- unsigned *head)
+ const hwaddr *in_addr, unsigned *head)
{
unsigned avail_idx;
vring_avail_t *avail = svq->vring.avail;
@@ -191,13 +209,14 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
return false;
}
- ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
- false);
+ ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr,
+ in_num > 0, false);
if (unlikely(!ok)) {
return false;
}
- ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
+ ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr, false,
+ true);
if (unlikely(!ok)) {
return false;
}
@@ -247,8 +266,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
* Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
*/
int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
- size_t out_num, const struct iovec *in_sg, size_t in_num,
- VirtQueueElement *elem)
+ size_t out_num, const hwaddr *out_addr,
+ const struct iovec *in_sg, size_t in_num,
+ const hwaddr *in_addr, VirtQueueElement *elem)
{
unsigned qemu_head;
unsigned ndescs = in_num + out_num;
@@ -258,7 +278,8 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
return -ENOSPC;
}
- ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
+ ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num,
+ in_addr, &qemu_head);
if (unlikely(!ok)) {
return -EINVAL;
}
@@ -274,8 +295,8 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
VirtQueueElement *elem)
{
- return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
- elem->in_num, elem);
+ return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->out_addr,
+ elem->in_sg, elem->in_num, elem->in_addr, elem);
}
/**
@@ -360,17 +360,17 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
llsize = int128_sub(llend, int128_make64(iova));
if (s->shadow_data) {
int r;
- hwaddr hw_vaddr = (hwaddr)(uintptr_t)vaddr;
+ hwaddr gpa = section->offset_within_address_space;
mem_region.size = int128_get64(llsize) - 1,
mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly),
- r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region, hw_vaddr);
+ r = vhost_iova_tree_map_alloc_gpa(s->iova_tree, &mem_region, gpa);
if (unlikely(r != IOVA_OK)) {
error_report("Can't allocate a mapping (%d)", r);
- if (mem_region.translated_addr == hw_vaddr) {
- error_report("Insertion to IOVA->HVA tree failed");
+ if (mem_region.translated_addr == gpa) {
+ error_report("Insertion to GPA->IOVA tree failed");
/* Remove the mapping from the IOVA-only tree */
goto fail_map;
}
@@ -392,7 +392,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
fail_map:
if (s->shadow_data) {
- vhost_iova_tree_remove(s->iova_tree, mem_region);
+ vhost_iova_tree_remove_gpa(s->iova_tree, mem_region);
}
fail:
@@ -446,21 +446,18 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
if (s->shadow_data) {
const DMAMap *result;
- const void *vaddr = memory_region_get_ram_ptr(section->mr) +
- section->offset_within_region +
- (iova - section->offset_within_address_space);
DMAMap mem_region = {
- .translated_addr = (hwaddr)(uintptr_t)vaddr,
+ .translated_addr = section->offset_within_address_space,
.size = int128_get64(llsize) - 1,
};
- result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region);
+ result = vhost_iova_tree_find_gpa(s->iova_tree, &mem_region);
if (!result) {
/* The memory listener map wasn't mapped */
return;
}
iova = result->iova;
- vhost_iova_tree_remove(s->iova_tree, *result);
+ vhost_iova_tree_remove_gpa(s->iova_tree, *result);
}
vhost_vdpa_iotlb_batch_begin_once(s);
/*
@@ -649,7 +649,7 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s,
VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
int r;
- r = vhost_svq_add(svq, out_sg, out_num, in_sg, in_num, NULL);
+ r = vhost_svq_add(svq, out_sg, out_num, NULL, in_sg, in_num, NULL, NULL);
if (unlikely(r != 0)) {
if (unlikely(r == -ENOSPC)) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
@@ -257,3 +257,49 @@ void iova_tree_destroy(IOVATree *tree)
g_tree_destroy(tree->tree);
g_free(tree);
}
+
+static int gpa_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
+{
+ const DMAMap *m1 = a, *m2 = b;
+
+ if (m1->translated_addr > m2->translated_addr + m2->size) {
+ return 1;
+ }
+
+ if (m1->translated_addr + m1->size < m2->translated_addr) {
+ return -1;
+ }
+
+ /* Overlapped */
+ return 0;
+}
+
+IOVATree *gpa_tree_new(void)
+{
+ IOVATree *gpa_tree = g_new0(IOVATree, 1);
+
+ gpa_tree->tree = g_tree_new_full(gpa_tree_compare, NULL, g_free, NULL);
+
+ return gpa_tree;
+}
+
+int gpa_tree_insert(IOVATree *tree, const DMAMap *map)
+{
+ DMAMap *new;
+
+ if (map->translated_addr + map->size < map->translated_addr ||
+ map->perm == IOMMU_NONE) {
+ return IOVA_ERR_INVALID;
+ }
+
+ /* We don't allow inserting ranges that overlap with existing ones */
+ if (iova_tree_find(tree, map)) {
+ return IOVA_ERR_OVERLAP;
+ }
+
+ new = g_new0(DMAMap, 1);
+ memcpy(new, map, sizeof(*new));
+ iova_tree_insert_internal(tree->tree, new);
+
+ return IOVA_OK;
+}