@@ -784,18 +784,37 @@ static int __must_check cf_check iommu_f
return iommu_flush_iotlb(d, INVALID_DFN, 0, 0);
}
+static void queue_free_pt(struct domain_iommu *hd, mfn_t mfn, unsigned int level)
+{
+ if ( level > 1 )
+ {
+ struct dma_pte *pt = map_domain_page(mfn);
+ unsigned int i;
+
+ for ( i = 0; i < PTE_NUM; ++i )
+ if ( dma_pte_present(pt[i]) && !dma_pte_superpage(pt[i]) )
+ queue_free_pt(hd, maddr_to_mfn(dma_pte_addr(pt[i])),
+ level - 1);
+
+ unmap_domain_page(pt);
+ }
+
+ iommu_queue_free_pgtable(hd, mfn_to_page(mfn));
+}
+
/* clear one page's page table */
static int dma_pte_clear_one(struct domain *domain, daddr_t addr,
unsigned int order,
unsigned int *flush_flags)
{
struct domain_iommu *hd = dom_iommu(domain);
- struct dma_pte *page = NULL, *pte = NULL;
+ struct dma_pte *page = NULL, *pte = NULL, old;
u64 pg_maddr;
+ unsigned int level = (order / LEVEL_STRIDE) + 1;
spin_lock(&hd->arch.mapping_lock);
- /* get last level pte */
- pg_maddr = addr_to_dma_page_maddr(domain, addr, 1, flush_flags, false);
+ /* get target level pte */
+ pg_maddr = addr_to_dma_page_maddr(domain, addr, level, flush_flags, false);
if ( pg_maddr < PAGE_SIZE )
{
spin_unlock(&hd->arch.mapping_lock);
@@ -803,7 +822,7 @@ static int dma_pte_clear_one(struct doma
}
page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
- pte = page + address_level_offset(addr, 1);
+ pte = &page[address_level_offset(addr, level)];
if ( !dma_pte_present(*pte) )
{
@@ -812,14 +831,20 @@ static int dma_pte_clear_one(struct doma
return 0;
}
+ old = *pte;
dma_clear_pte(*pte);
- *flush_flags |= IOMMU_FLUSHF_modified;
spin_unlock(&hd->arch.mapping_lock);
iommu_sync_cache(pte, sizeof(struct dma_pte));
unmap_vtd_domain_page(page);
+ *flush_flags |= IOMMU_FLUSHF_modified;
+
+ if ( order && !dma_pte_superpage(old) )
+ queue_free_pt(hd, maddr_to_mfn(dma_pte_addr(old)),
+ order / LEVEL_STRIDE);
+
return 0;
}
@@ -2097,8 +2122,12 @@ static int __must_check cf_check intel_i
struct domain_iommu *hd = dom_iommu(d);
struct dma_pte *page, *pte, old, new = {};
u64 pg_maddr;
+ unsigned int level = (IOMMUF_order(flags) / LEVEL_STRIDE) + 1;
int rc = 0;
+ ASSERT((hd->platform_ops->page_sizes >> IOMMUF_order(flags)) &
+ PAGE_SIZE_4K);
+
/* Do nothing if VT-d shares EPT page table */
if ( iommu_use_hap_pt(d) )
return 0;
@@ -2121,7 +2150,7 @@ static int __must_check cf_check intel_i
return 0;
}
- pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 1, flush_flags,
+ pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), level, flush_flags,
true);
if ( pg_maddr < PAGE_SIZE )
{
@@ -2130,13 +2159,15 @@ static int __must_check cf_check intel_i
}
page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
- pte = &page[dfn_x(dfn) & LEVEL_MASK];
+ pte = &page[address_level_offset(dfn_to_daddr(dfn), level)];
old = *pte;
dma_set_pte_addr(new, mfn_to_maddr(mfn));
dma_set_pte_prot(new,
((flags & IOMMUF_readable) ? DMA_PTE_READ : 0) |
((flags & IOMMUF_writable) ? DMA_PTE_WRITE : 0));
+ if ( IOMMUF_order(flags) )
+ dma_set_pte_superpage(new);
/* Set the SNP on leaf page table if Snoop Control available */
if ( iommu_snoop )
@@ -2157,14 +2188,26 @@ static int __must_check cf_check intel_i
*flush_flags |= IOMMU_FLUSHF_added;
if ( dma_pte_present(old) )
+ {
*flush_flags |= IOMMU_FLUSHF_modified;
+ if ( IOMMUF_order(flags) && !dma_pte_superpage(old) )
+ queue_free_pt(hd, maddr_to_mfn(dma_pte_addr(old)),
+ IOMMUF_order(flags) / LEVEL_STRIDE);
+ }
+
return rc;
}
static int __must_check cf_check intel_iommu_unmap_page(
struct domain *d, dfn_t dfn, unsigned int order, unsigned int *flush_flags)
{
+ /*
+ * While really we could unmap at any granularity, for now we assume unmaps
+ * are issued by common code only at the same granularity as maps.
+ */
+ ASSERT((dom_iommu(d)->platform_ops->page_sizes >> order) & PAGE_SIZE_4K);
+
/* Do nothing if VT-d shares EPT page table */
if ( iommu_use_hap_pt(d) )
return 0;
@@ -2519,6 +2562,7 @@ static int __init cf_check vtd_setup(voi
{
struct acpi_drhd_unit *drhd;
struct vtd_iommu *iommu;
+ unsigned int large_sizes = PAGE_SIZE_2M | PAGE_SIZE_1G;
int ret;
bool reg_inval_supported = true;
@@ -2561,6 +2605,11 @@ static int __init cf_check vtd_setup(voi
cap_sps_2mb(iommu->cap) ? ", 2MB" : "",
cap_sps_1gb(iommu->cap) ? ", 1GB" : "");
+ if ( !cap_sps_2mb(iommu->cap) )
+ large_sizes &= ~PAGE_SIZE_2M;
+ if ( !cap_sps_1gb(iommu->cap) )
+ large_sizes &= ~PAGE_SIZE_1G;
+
#ifndef iommu_snoop
if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) )
iommu_snoop = false;
@@ -2632,6 +2681,9 @@ static int __init cf_check vtd_setup(voi
if ( ret )
goto error;
+ ASSERT(iommu_ops.page_sizes == PAGE_SIZE_4K);
+ iommu_ops.page_sizes |= large_sizes;
+
register_keyhandler('V', vtd_dump_iommu_info, "dump iommu info", 1);
return 0;
@@ -2964,7 +3016,7 @@ static void vtd_dump_page_table_level(pa
continue;
address = gpa + offset_level_address(i, level);
- if ( next_level >= 1 )
+ if ( next_level && !dma_pte_superpage(*pte) )
vtd_dump_page_table_level(dma_pte_addr(*pte), next_level,
address, indent + 1);
else