@@ -85,6 +85,7 @@ struct vfio_group {
atomic_t opened;
wait_queue_head_t container_q;
bool noiommu;
+ unsigned int dev_counter;
struct kvm *kvm;
struct blocking_notifier_head notifier;
};
@@ -555,6 +556,7 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group,
mutex_lock(&group->device_lock);
list_add(&device->group_next, &group->device_list);
+ group->dev_counter++;
mutex_unlock(&group->device_lock);
return device;
@@ -567,6 +569,7 @@ static void vfio_device_release(struct kref *kref)
struct vfio_group *group = device->group;
list_del(&device->group_next);
+ group->dev_counter--;
mutex_unlock(&group->device_lock);
dev_set_drvdata(device->dev, NULL);
@@ -1945,6 +1948,9 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
if (!group)
return -ENODEV;
+ if (group->dev_counter > 1)
+ return -EINVAL;
+
ret = vfio_group_add_container_user(group);
if (ret)
goto err_pin_pages;
@@ -1952,7 +1958,8 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->pin_pages))
- ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
+ ret = driver->ops->pin_pages(container->iommu_data,
+ group->iommu_group, user_pfn,
npage, prot, phys_pfn);
else
ret = -ENOTTY;
@@ -2050,8 +2057,8 @@ int vfio_group_pin_pages(struct vfio_group *group,
driver = container->iommu_driver;
if (likely(driver && driver->ops->pin_pages))
ret = driver->ops->pin_pages(container->iommu_data,
- user_iova_pfn, npage,
- prot, phys_pfn);
+ group->iommu_group, user_iova_pfn,
+ npage, prot, phys_pfn);
else
ret = -ENOTTY;
@@ -73,6 +73,7 @@ struct vfio_iommu {
bool v2;
bool nesting;
bool dirty_page_tracking;
+ bool pinned_page_dirty_scope;
};
struct vfio_domain {
@@ -100,6 +101,7 @@ struct vfio_group {
struct iommu_group *iommu_group;
struct list_head next;
bool mdev_group; /* An mdev group */
+ bool pinned_page_dirty_scope;
};
struct vfio_iova {
@@ -143,6 +145,10 @@ struct vfio_regions {
static int put_pfn(unsigned long pfn, int prot);
+static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
+ struct iommu_group *iommu_group);
+
+static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu);
/*
* This code handles mapping and unmapping of user data buffers
* into DMA'ble space using the IOMMU
@@ -622,11 +628,13 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
}
static int vfio_iommu_type1_pin_pages(void *iommu_data,
+ struct iommu_group *iommu_group,
unsigned long *user_pfn,
int npage, int prot,
unsigned long *phys_pfn)
{
struct vfio_iommu *iommu = iommu_data;
+ struct vfio_group *group;
int i, j, ret;
unsigned long remote_vaddr;
struct vfio_dma *dma;
@@ -699,8 +707,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
(iova - dma->iova) >> pgshift, 1);
}
}
-
ret = i;
+
+ group = vfio_iommu_find_iommu_group(iommu, iommu_group);
+ if (!group->pinned_page_dirty_scope) {
+ group->pinned_page_dirty_scope = true;
+ update_pinned_page_dirty_scope(iommu);
+ }
+
goto pin_done;
pin_unwind:
@@ -960,8 +974,9 @@ static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu)
}
}
-static int update_user_bitmap(u64 __user *bitmap, struct vfio_dma *dma,
- dma_addr_t base_iova, size_t pgsize)
+static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
+ struct vfio_dma *dma, dma_addr_t base_iova,
+ size_t pgsize)
{
unsigned long pgshift = __ffs(pgsize);
unsigned long nbits = dma->size >> pgshift;
@@ -970,8 +985,11 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_dma *dma,
unsigned long shift = bit_offset % BITS_PER_LONG;
unsigned long leftover;
- /* mark all pages dirty if all pages are pinned and mapped. */
- if (dma->iommu_mapped)
+ /*
+ * mark all pages dirty if any IOMMU capable device is not able
+ * to report dirty pages and all pages are pinned and mapped.
+ */
+ if (!iommu->pinned_page_dirty_scope && dma->iommu_mapped)
bitmap_set(dma->bitmap, 0, nbits);
if (shift) {
@@ -1024,7 +1042,7 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
if (dma->iova > iova + size - 1)
break;
- ret = update_user_bitmap(bitmap, dma, iova, pgsize);
+ ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize);
if (ret)
return ret;
@@ -1169,7 +1187,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
}
if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
- ret = update_user_bitmap(bitmap->data, dma,
+ ret = update_user_bitmap(bitmap->data, iommu, dma,
unmap->iova, pgsize);
if (ret)
break;
@@ -1521,6 +1539,51 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
return NULL;
}
+static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
+ struct iommu_group *iommu_group)
+{
+ struct vfio_domain *domain;
+ struct vfio_group *group = NULL;
+
+ list_for_each_entry(domain, &iommu->domain_list, next) {
+ group = find_iommu_group(domain, iommu_group);
+ if (group)
+ return group;
+ }
+
+ if (iommu->external_domain)
+ group = find_iommu_group(iommu->external_domain, iommu_group);
+
+ return group;
+}
+
+static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu)
+{
+ struct vfio_domain *domain;
+ struct vfio_group *group;
+
+ list_for_each_entry(domain, &iommu->domain_list, next) {
+ list_for_each_entry(group, &domain->group_list, next) {
+ if (!group->pinned_page_dirty_scope) {
+ iommu->pinned_page_dirty_scope = false;
+ return;
+ }
+ }
+ }
+
+ if (iommu->external_domain) {
+ domain = iommu->external_domain;
+ list_for_each_entry(group, &domain->group_list, next) {
+ if (!group->pinned_page_dirty_scope) {
+ iommu->pinned_page_dirty_scope = false;
+ return;
+ }
+ }
+ }
+
+ iommu->pinned_page_dirty_scope = true;
+}
+
static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
phys_addr_t *base)
{
@@ -1928,6 +1991,16 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
list_add(&group->next,
&iommu->external_domain->group_list);
+ /*
+ * Non-iommu backed group cannot dirty memory directly,
+ * it can only use interfaces that provide dirty
+ * tracking.
+ * The iommu scope can only be promoted with the
+ * addition of a dirty tracking group.
+ */
+ group->pinned_page_dirty_scope = true;
+ if (!iommu->pinned_page_dirty_scope)
+ update_pinned_page_dirty_scope(iommu);
mutex_unlock(&iommu->lock);
return 0;
@@ -2051,6 +2124,13 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
done:
/* Delete the old one and insert new iova list */
vfio_iommu_iova_insert_copy(iommu, &iova_copy);
+
+ /*
+ * An iommu backed group can dirty memory directly and therefore
+ * demotes the iommu scope until it declares itself dirty tracking
+ * capable via the page pinning interface.
+ */
+ iommu->pinned_page_dirty_scope = false;
mutex_unlock(&iommu->lock);
vfio_iommu_resv_free(&group_resv_regions);
@@ -2203,6 +2283,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_domain *domain;
struct vfio_group *group;
+ bool update_dirty_scope = false;
LIST_HEAD(iova_copy);
mutex_lock(&iommu->lock);
@@ -2210,6 +2291,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
if (iommu->external_domain) {
group = find_iommu_group(iommu->external_domain, iommu_group);
if (group) {
+ update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(&group->next);
kfree(group);
@@ -2239,6 +2321,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
continue;
vfio_iommu_detach_group(domain, group);
+ update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(&group->next);
kfree(group);
/*
@@ -2270,6 +2353,12 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
vfio_iommu_iova_free(&iova_copy);
detach_group_done:
+ /*
+ * Removal of a group without dirty tracking may allow the iommu scope
+ * to be promoted.
+ */
+ if (update_dirty_scope)
+ update_pinned_page_dirty_scope(iommu);
mutex_unlock(&iommu->lock);
}
@@ -76,7 +76,9 @@ struct vfio_iommu_driver_ops {
struct iommu_group *group);
void (*detach_group)(void *iommu_data,
struct iommu_group *group);
- int (*pin_pages)(void *iommu_data, unsigned long *user_pfn,
+ int (*pin_pages)(void *iommu_data,
+ struct iommu_group *group,
+ unsigned long *user_pfn,
int npage, int prot,
unsigned long *phys_pfn);
int (*unpin_pages)(void *iommu_data,