@@ -85,6 +85,7 @@ struct vfio_group {
atomic_t opened;
wait_queue_head_t container_q;
bool noiommu;
+ unsigned int dev_counter;
struct kvm *kvm;
struct blocking_notifier_head notifier;
};
@@ -555,6 +556,7 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group,
mutex_lock(&group->device_lock);
list_add(&device->group_next, &group->device_list);
+ group->dev_counter++;
mutex_unlock(&group->device_lock);
return device;
@@ -567,6 +569,7 @@ static void vfio_device_release(struct kref *kref)
struct vfio_group *group = device->group;
list_del(&device->group_next);
+ group->dev_counter--;
mutex_unlock(&group->device_lock);
dev_set_drvdata(device->dev, NULL);
@@ -1933,6 +1936,9 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
if (!group)
return -ENODEV;
+ if (group->dev_counter > 1)
+ return -EINVAL;
+
ret = vfio_group_add_container_user(group);
if (ret)
goto err_pin_pages;
@@ -1940,7 +1946,8 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->pin_pages))
- ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
+ ret = driver->ops->pin_pages(container->iommu_data,
+ group->iommu_group, user_pfn,
npage, prot, phys_pfn);
else
ret = -ENOTTY;
@@ -2038,8 +2045,8 @@ int vfio_group_pin_pages(struct vfio_group *group,
driver = container->iommu_driver;
if (likely(driver && driver->ops->pin_pages))
ret = driver->ops->pin_pages(container->iommu_data,
- user_iova_pfn, npage,
- prot, phys_pfn);
+ group->iommu_group, user_iova_pfn,
+ npage, prot, phys_pfn);
else
ret = -ENOTTY;
@@ -72,6 +72,7 @@ struct vfio_iommu {
bool v2;
bool nesting;
bool dirty_page_tracking;
+ bool pinned_page_dirty_scope;
};
struct vfio_domain {
@@ -99,6 +100,7 @@ struct vfio_group {
struct iommu_group *iommu_group;
struct list_head next;
bool mdev_group; /* An mdev group */
+ bool pinned_page_dirty_scope;
};
struct vfio_iova {
@@ -143,6 +145,10 @@ struct vfio_regions {
static int put_pfn(unsigned long pfn, int prot);
static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu);
+static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
+ struct iommu_group *iommu_group);
+
+static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu);
/*
* This code handles mapping and unmapping of user data buffers
* into DMA'ble space using the IOMMU
@@ -579,11 +585,13 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
}
static int vfio_iommu_type1_pin_pages(void *iommu_data,
+ struct iommu_group *iommu_group,
unsigned long *user_pfn,
int npage, int prot,
unsigned long *phys_pfn)
{
struct vfio_iommu *iommu = iommu_data;
+ struct vfio_group *group;
int i, j, ret;
unsigned long remote_vaddr;
struct vfio_dma *dma;
@@ -653,8 +661,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
(vpfn->iova - dma->iova) >> pgshift, 1);
}
}
-
ret = i;
+
+ group = vfio_iommu_find_iommu_group(iommu, iommu_group);
+ if (!group->pinned_page_dirty_scope) {
+ group->pinned_page_dirty_scope = true;
+ update_pinned_page_dirty_scope(iommu);
+ }
+
goto pin_done;
pin_unwind:
@@ -936,8 +950,11 @@ static int vfio_iova_dirty_bitmap(struct vfio_iommu *iommu, dma_addr_t iova,
npages = dma->size >> pgshift;
bitmap_size = DIRTY_BITMAP_BYTES(npages);
- /* mark all pages dirty if all pages are pinned and mapped. */
- if (dma->iommu_mapped)
+ /*
+ * mark all pages dirty if any IOMMU capable device is not able
+ * to report dirty pages and all pages are pinned and mapped.
+ */
+ if (!iommu->pinned_page_dirty_scope && dma->iommu_mapped)
bitmap_set(dma->bitmap, 0, npages);
if (copy_to_user((void __user *)bitmap, dma->bitmap, bitmap_size))
@@ -1421,6 +1438,51 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
return NULL;
}
+static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
+ struct iommu_group *iommu_group)
+{
+ struct vfio_domain *domain;
+ struct vfio_group *group = NULL;
+
+ list_for_each_entry(domain, &iommu->domain_list, next) {
+ group = find_iommu_group(domain, iommu_group);
+ if (group)
+ return group;
+ }
+
+ if (iommu->external_domain)
+ group = find_iommu_group(iommu->external_domain, iommu_group);
+
+ return group;
+}
+
+static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu)
+{
+ struct vfio_domain *domain;
+ struct vfio_group *group;
+
+ list_for_each_entry(domain, &iommu->domain_list, next) {
+ list_for_each_entry(group, &domain->group_list, next) {
+ if (!group->pinned_page_dirty_scope) {
+ iommu->pinned_page_dirty_scope = false;
+ return;
+ }
+ }
+ }
+
+ if (iommu->external_domain) {
+ domain = iommu->external_domain;
+ list_for_each_entry(group, &domain->group_list, next) {
+ if (!group->pinned_page_dirty_scope) {
+ iommu->pinned_page_dirty_scope = false;
+ return;
+ }
+ }
+ }
+
+ iommu->pinned_page_dirty_scope = true;
+}
+
static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
phys_addr_t *base)
{
@@ -1827,6 +1889,16 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
list_add(&group->next,
&iommu->external_domain->group_list);
+ /*
+ * Non-iommu backed group cannot dirty memory directly,
+ * it can only use interfaces that provide dirty
+ * tracking.
+ * The iommu scope can only be promoted with the
+ * addition of a dirty tracking group.
+ */
+ group->pinned_page_dirty_scope = true;
+ if (!iommu->pinned_page_dirty_scope)
+ update_pinned_page_dirty_scope(iommu);
mutex_unlock(&iommu->lock);
return 0;
@@ -1949,6 +2021,13 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
done:
/* Delete the old one and insert new iova list */
vfio_iommu_iova_insert_copy(iommu, &iova_copy);
+
+ /*
+ * An iommu backed group can dirty memory directly and therefore
+ * demotes the iommu scope until it declares itself dirty tracking
+ * capable via the page pinning interface.
+ */
+ iommu->pinned_page_dirty_scope = false;
mutex_unlock(&iommu->lock);
vfio_iommu_resv_free(&group_resv_regions);
@@ -2101,6 +2180,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_domain *domain;
struct vfio_group *group;
+ bool update_dirty_scope = false;
LIST_HEAD(iova_copy);
mutex_lock(&iommu->lock);
@@ -2108,6 +2188,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
if (iommu->external_domain) {
group = find_iommu_group(iommu->external_domain, iommu_group);
if (group) {
+ update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(&group->next);
kfree(group);
@@ -2137,6 +2218,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
continue;
vfio_iommu_detach_group(domain, group);
+ update_dirty_scope = !group->pinned_page_dirty_scope;
list_del(&group->next);
kfree(group);
/*
@@ -2167,6 +2249,12 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
vfio_iommu_iova_free(&iova_copy);
detach_group_done:
+ /*
+ * Removal of a group without dirty tracking may allow the iommu scope
+ * to be promoted.
+ */
+ if (update_dirty_scope)
+ update_pinned_page_dirty_scope(iommu);
mutex_unlock(&iommu->lock);
}
@@ -72,7 +72,9 @@ struct vfio_iommu_driver_ops {
struct iommu_group *group);
void (*detach_group)(void *iommu_data,
struct iommu_group *group);
- int (*pin_pages)(void *iommu_data, unsigned long *user_pfn,
+ int (*pin_pages)(void *iommu_data,
+ struct iommu_group *group,
+ unsigned long *user_pfn,
int npage, int prot,
unsigned long *phys_pfn);
int (*unpin_pages)(void *iommu_data,