@@ -383,35 +383,16 @@ int amd_iommu_map_page(struct domain *d,
unsigned int flags, unsigned int *flush_flags)
{
struct domain_iommu *hd = dom_iommu(d);
- int rc;
unsigned long pt_mfn[7];
memset(pt_mfn, 0, sizeof(pt_mfn));
spin_lock(&hd->arch.mapping_lock);
- rc = amd_iommu_alloc_root(hd);
- if ( rc )
+ if ( !hd->arch.root_table )
{
spin_unlock(&hd->arch.mapping_lock);
- AMD_IOMMU_DEBUG("Root table alloc failed, dfn = %"PRI_dfn"\n",
- dfn_x(dfn));
- domain_crash(d);
- return rc;
- }
-
- /* Since HVM domain is initialized with 2 level IO page table,
- * we might need a deeper page table for wider dfn now */
- if ( is_hvm_domain(d) )
- {
- if ( update_paging_mode(d, dfn_x(dfn)) )
- {
- spin_unlock(&hd->arch.mapping_lock);
- AMD_IOMMU_DEBUG("Update page mode failed dfn = %"PRI_dfn"\n",
- dfn_x(dfn));
- domain_crash(d);
- return -EFAULT;
- }
+ return -ENODATA;
}
if ( iommu_pde_from_dfn(d, dfn_x(dfn), pt_mfn, true) || (pt_mfn[1] == 0) )
@@ -468,6 +449,48 @@ int amd_iommu_unmap_page(struct domain *
return 0;
}
+
+int amd_iommu_notify_dfn(struct domain *d, dfn_t dfn)
+{
+ struct domain_iommu *hd = dom_iommu(d);
+ int rc;
+
+ ASSERT(is_hvm_domain(d));
+
+ /*
+ * Since HVM domain is initialized with 2 level IO page table,
+ * we might need a deeper page table for wider dfn now.
+ */
+ pcidevs_lock();
+ spin_lock(&hd->arch.mapping_lock);
+
+ rc = amd_iommu_alloc_root(hd);
+ if ( rc )
+ {
+ spin_unlock(&hd->arch.mapping_lock);
+ pcidevs_unlock();
+ AMD_IOMMU_DEBUG("Root table alloc failed, dfn = %"PRI_dfn" (rc %d)\n",
+ dfn_x(dfn), rc);
+ domain_crash(d);
+ return rc;
+ }
+
+ rc = update_paging_mode(d, dfn_x(dfn));
+ if ( rc )
+ {
+ spin_unlock(&hd->arch.mapping_lock);
+ pcidevs_unlock();
+ AMD_IOMMU_DEBUG("Update paging mode failed dfn %"PRI_dfn" (rc %d)\n",
+ dfn_x(dfn), rc);
+ domain_crash(d);
+ return rc;
+ }
+
+ spin_unlock(&hd->arch.mapping_lock);
+ pcidevs_unlock();
+
+ return 0;
+}
static unsigned long flush_count(unsigned long dfn, unsigned int page_count,
unsigned int order)
@@ -628,6 +628,7 @@ static const struct iommu_ops __initcons
.teardown = amd_iommu_domain_destroy,
.map_page = amd_iommu_map_page,
.unmap_page = amd_iommu_unmap_page,
+ .notify_dfn = amd_iommu_notify_dfn,
.iotlb_flush = amd_iommu_flush_iotlb_pages,
.iotlb_flush_all = amd_iommu_flush_iotlb_all,
.free_page_table = deallocate_page_table,
@@ -61,6 +61,7 @@ int __must_check amd_iommu_map_page(stru
int __must_check amd_iommu_unmap_page(struct domain *d, dfn_t dfn,
unsigned int *flush_flags);
int __must_check amd_iommu_alloc_root(struct domain_iommu *hd);
+int __must_check amd_iommu_notify_dfn(struct domain *d, dfn_t dfn);
int amd_iommu_reserve_domain_unity_map(struct domain *domain,
paddr_t phys_addr, unsigned long size,
int iw, int ir);
update_paging_mode() expects to be invoked with the PCI devices lock held. The check occurring only when the mode actually needs updating, the violation of this rule by the majority of callers did go unnoticed until per-domain IOMMU setup was changed to do away with on-demand creation of IOMMU page tables. Acquiring the necessary lock in amd_iommu_map_page() or intermediate layers in generic IOMMU code is not possible - we'd risk all sorts of lock order violations. Hence the call to update_paging_mode() gets pulled out of the function, to be invoked instead from the new notify_dfn() hook, where no potentially conflicting locks are being held by the callers. Similarly the call to amd_iommu_alloc_root() gets pulled out - now that we receive notification of all DFN range increases, there's no need anymore to do this check when actually mapping a page. Note that this ought to result in a small performance improvement as well: The hook often gets invoked just once for larger blocks of pages, so rather than going through amd_iommu_alloc_root() and update_paging_mode() once per page, we may now invoke it just once per batch. Reported-by: Sander Eikelenboom <linux@eikelenboom.it> Signed-off-by: Jan Beulich <jbeulich@suse.com>