Message ID | 20210209152816.15792-6-julien@xen.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xen/iommu: Collection of bug fixes for IOMMU teadorwn | expand |
> -----Original Message----- > From: Julien Grall <julien@xen.org> > Sent: 09 February 2021 15:28 > To: xen-devel@lists.xenproject.org > Cc: hongyxia@amazon.co.uk; iwj@xenproject.org; Julien Grall <jgrall@amazon.com>; Jan Beulich > <jbeulich@suse.com>; Andrew Cooper <andrew.cooper3@citrix.com>; Kevin Tian <kevin.tian@intel.com>; > Paul Durrant <paul@xen.org> > Subject: [for-4.15][PATCH v2 5/5] xen/iommu: x86: Clear the root page-table before freeing the page- > tables > > From: Julien Grall <jgrall@amazon.com> > > The new per-domain IOMMU page-table allocator will now free the > page-tables when domain's resources are relinquished. However, the root > page-table (i.e. hd->arch.pg_maddr) will not be cleared. > > Xen may access the IOMMU page-tables afterwards at least in the case of > PV domain: > > (XEN) Xen call trace: > (XEN) [<ffff82d04025b4b2>] R iommu.c#addr_to_dma_page_maddr+0x12e/0x1d8 > (XEN) [<ffff82d04025b695>] F iommu.c#intel_iommu_unmap_page+0x5d/0xf8 > (XEN) [<ffff82d0402695f3>] F iommu_unmap+0x9c/0x129 > (XEN) [<ffff82d0402696a6>] F iommu_legacy_unmap+0x26/0x63 > (XEN) [<ffff82d04033c5c7>] F mm.c#cleanup_page_mappings+0x139/0x144 > (XEN) [<ffff82d04033c61d>] F put_page+0x4b/0xb3 > (XEN) [<ffff82d04033c87f>] F put_page_from_l1e+0x136/0x13b > (XEN) [<ffff82d04033cada>] F devalidate_page+0x256/0x8dc > (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e > (XEN) [<ffff82d04033d64d>] F mm.c#put_pt_page+0x6f/0x80 > (XEN) [<ffff82d04033d8d6>] F mm.c#put_page_from_l2e+0x8a/0xcf > (XEN) [<ffff82d04033cc27>] F devalidate_page+0x3a3/0x8dc > (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e > (XEN) [<ffff82d04033d64d>] F mm.c#put_pt_page+0x6f/0x80 > (XEN) [<ffff82d04033d807>] F mm.c#put_page_from_l3e+0x8a/0xcf > (XEN) [<ffff82d04033cdf0>] F devalidate_page+0x56c/0x8dc > (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e > (XEN) [<ffff82d04033d64d>] F mm.c#put_pt_page+0x6f/0x80 > (XEN) [<ffff82d04033d6c7>] F mm.c#put_page_from_l4e+0x69/0x6d > (XEN) [<ffff82d04033cf24>] F devalidate_page+0x6a0/0x8dc > (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e > (XEN) [<ffff82d04033d92e>] F put_page_type_preemptible+0x13/0x15 > (XEN) [<ffff82d04032598a>] F domain.c#relinquish_memory+0x1ff/0x4e9 > (XEN) [<ffff82d0403295f2>] F domain_relinquish_resources+0x2b6/0x36a > (XEN) [<ffff82d040205cdf>] F domain_kill+0xb8/0x141 > (XEN) [<ffff82d040236cac>] F do_domctl+0xb6f/0x18e5 > (XEN) [<ffff82d04031d098>] F pv_hypercall+0x2f0/0x55f > (XEN) [<ffff82d04039b432>] F lstar_enter+0x112/0x120 > > This will result to a use after-free and possibly an host crash or > memory corruption. > > Freeing the page-tables further down in domain_relinquish_resources() > would not work because pages may not be released until later if another > domain hold a reference on them. > > Once all the PCI devices have been de-assigned, it is actually pointless > to access modify the IOMMU page-tables. So we can simply clear the root > page-table address. > > Fixes: 3eef6d07d722 ("x86/iommu: convert VT-d code to use new page table allocator") > Signed-off-by: Julien Grall <jgrall@amazon.com> > > --- > Changes in v2: > - Introduce clear_root_pgtable() > - Move the patch later in the series > --- > xen/drivers/passthrough/amd/pci_amd_iommu.c | 12 +++++++++++- > xen/drivers/passthrough/vtd/iommu.c | 12 +++++++++++- > xen/drivers/passthrough/x86/iommu.c | 6 ++++++ > xen/include/xen/iommu.h | 1 + > 4 files changed, 29 insertions(+), 2 deletions(-) > > diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c > index 42b5a5a9bec4..81add0ba26b4 100644 > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c > @@ -381,9 +381,18 @@ static int amd_iommu_assign_device(struct domain *d, u8 devfn, > return reassign_device(pdev->domain, d, devfn, pdev); > } > > +static void iommu_clear_root_pgtable(struct domain *d) > +{ > + struct domain_iommu *hd = dom_iommu(d); > + > + spin_lock(&hd->arch.mapping_lock); > + hd->arch.amd.root_table = NULL; > + spin_unlock(&hd->arch.mapping_lock); > +} > + > static void amd_iommu_domain_destroy(struct domain *d) > { > - dom_iommu(d)->arch.amd.root_table = NULL; > + ASSERT(!dom_iommu(d)->arch.amd.root_table); > } > > static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev) > @@ -565,6 +574,7 @@ static const struct iommu_ops __initconstrel _iommu_ops = { > .remove_device = amd_iommu_remove_device, > .assign_device = amd_iommu_assign_device, > .teardown = amd_iommu_domain_destroy, > + .clear_root_pgtable = iommu_clear_root_pgtable, > .map_page = amd_iommu_map_page, > .unmap_page = amd_iommu_unmap_page, > .iotlb_flush = amd_iommu_flush_iotlb_pages, > diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c > index d136fe36883b..e1871f6c2bc1 100644 > --- a/xen/drivers/passthrough/vtd/iommu.c > +++ b/xen/drivers/passthrough/vtd/iommu.c > @@ -1726,6 +1726,15 @@ out: > return ret; > } > > +static void iommu_clear_root_pgtable(struct domain *d) > +{ > + struct domain_iommu *hd = dom_iommu(d); > + > + spin_lock(&hd->arch.mapping_lock); > + hd->arch.vtd.pgd_maddr = 0; > + spin_unlock(&hd->arch.mapping_lock); > +} > + > static void iommu_domain_teardown(struct domain *d) > { > struct domain_iommu *hd = dom_iommu(d); > @@ -1740,7 +1749,7 @@ static void iommu_domain_teardown(struct domain *d) > xfree(mrmrr); > } > > - hd->arch.vtd.pgd_maddr = 0; > + ASSERT(!hd->arch.vtd.pgd_maddr); > } > > static int __must_check intel_iommu_map_page(struct domain *d, dfn_t dfn, > @@ -2719,6 +2728,7 @@ static struct iommu_ops __initdata vtd_ops = { > .remove_device = intel_iommu_remove_device, > .assign_device = intel_iommu_assign_device, > .teardown = iommu_domain_teardown, > + .clear_root_pgtable = iommu_clear_root_pgtable, > .map_page = intel_iommu_map_page, > .unmap_page = intel_iommu_unmap_page, > .lookup_page = intel_iommu_lookup_page, > diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c > index 82d770107a47..d3cdec6ee83f 100644 > --- a/xen/drivers/passthrough/x86/iommu.c > +++ b/xen/drivers/passthrough/x86/iommu.c > @@ -280,6 +280,12 @@ int iommu_free_pgtables(struct domain *d) > /* After this barrier no new page allocations can occur. */ > spin_barrier(&hd->arch.pgtables.lock); > > + /* > + * Pages will be moved to the free list in a bit. So we want to 'in a bit' sounds a little odd. I think you could just say 'below'. The code looks fine though so... Reviewed-by: Paul Durrant <paul@xen.org> > + * clear the root page-table to avoid any potential use after-free. > + */ > + hd->platform_ops->clear_root_pgtable(d); > + > while ( (pg = page_list_remove_head(&hd->arch.pgtables.list)) ) > { > free_domheap_page(pg); > diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h > index 863a68fe1622..d59ed7cbad43 100644 > --- a/xen/include/xen/iommu.h > +++ b/xen/include/xen/iommu.h > @@ -272,6 +272,7 @@ struct iommu_ops { > > int (*adjust_irq_affinities)(void); > void (*sync_cache)(const void *addr, unsigned int size); > + void (*clear_root_pgtable)(struct domain *d); > #endif /* CONFIG_X86 */ > > int __must_check (*suspend)(void); > -- > 2.17.1
> From: Julien Grall <julien@xen.org> > Sent: Tuesday, February 9, 2021 11:28 PM > > From: Julien Grall <jgrall@amazon.com> > > The new per-domain IOMMU page-table allocator will now free the > page-tables when domain's resources are relinquished. However, the root > page-table (i.e. hd->arch.pg_maddr) will not be cleared. It's the pointer not the table itself. > > Xen may access the IOMMU page-tables afterwards at least in the case of > PV domain: > > (XEN) Xen call trace: > (XEN) [<ffff82d04025b4b2>] R > iommu.c#addr_to_dma_page_maddr+0x12e/0x1d8 > (XEN) [<ffff82d04025b695>] F > iommu.c#intel_iommu_unmap_page+0x5d/0xf8 > (XEN) [<ffff82d0402695f3>] F iommu_unmap+0x9c/0x129 > (XEN) [<ffff82d0402696a6>] F iommu_legacy_unmap+0x26/0x63 > (XEN) [<ffff82d04033c5c7>] F mm.c#cleanup_page_mappings+0x139/0x144 > (XEN) [<ffff82d04033c61d>] F put_page+0x4b/0xb3 > (XEN) [<ffff82d04033c87f>] F put_page_from_l1e+0x136/0x13b > (XEN) [<ffff82d04033cada>] F devalidate_page+0x256/0x8dc > (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e > (XEN) [<ffff82d04033d64d>] F mm.c#put_pt_page+0x6f/0x80 > (XEN) [<ffff82d04033d8d6>] F mm.c#put_page_from_l2e+0x8a/0xcf > (XEN) [<ffff82d04033cc27>] F devalidate_page+0x3a3/0x8dc > (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e > (XEN) [<ffff82d04033d64d>] F mm.c#put_pt_page+0x6f/0x80 > (XEN) [<ffff82d04033d807>] F mm.c#put_page_from_l3e+0x8a/0xcf > (XEN) [<ffff82d04033cdf0>] F devalidate_page+0x56c/0x8dc > (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e > (XEN) [<ffff82d04033d64d>] F mm.c#put_pt_page+0x6f/0x80 > (XEN) [<ffff82d04033d6c7>] F mm.c#put_page_from_l4e+0x69/0x6d > (XEN) [<ffff82d04033cf24>] F devalidate_page+0x6a0/0x8dc > (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e > (XEN) [<ffff82d04033d92e>] F put_page_type_preemptible+0x13/0x15 > (XEN) [<ffff82d04032598a>] F domain.c#relinquish_memory+0x1ff/0x4e9 > (XEN) [<ffff82d0403295f2>] F domain_relinquish_resources+0x2b6/0x36a > (XEN) [<ffff82d040205cdf>] F domain_kill+0xb8/0x141 > (XEN) [<ffff82d040236cac>] F do_domctl+0xb6f/0x18e5 > (XEN) [<ffff82d04031d098>] F pv_hypercall+0x2f0/0x55f > (XEN) [<ffff82d04039b432>] F lstar_enter+0x112/0x120 > > This will result to a use after-free and possibly an host crash or > memory corruption. > > Freeing the page-tables further down in domain_relinquish_resources() > would not work because pages may not be released until later if another > domain hold a reference on them. > > Once all the PCI devices have been de-assigned, it is actually pointless > to access modify the IOMMU page-tables. So we can simply clear the root > page-table address. Above two paragraphs are confusing to me. I don't know what exact change is proposed until looking over the whole patch. Isn't it clearer to just say "We should clear the root page table pointer in iommu_free_pgtables to avoid use-after-free after all pgtables are moved to the free list"? otherwise: Reviewed-by: Kevin Tian <kevin.tian@intel.com> > > Fixes: 3eef6d07d722 ("x86/iommu: convert VT-d code to use new page table > allocator") > Signed-off-by: Julien Grall <jgrall@amazon.com> > > --- > Changes in v2: > - Introduce clear_root_pgtable() > - Move the patch later in the series > --- > xen/drivers/passthrough/amd/pci_amd_iommu.c | 12 +++++++++++- > xen/drivers/passthrough/vtd/iommu.c | 12 +++++++++++- > xen/drivers/passthrough/x86/iommu.c | 6 ++++++ > xen/include/xen/iommu.h | 1 + > 4 files changed, 29 insertions(+), 2 deletions(-) > > diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c > b/xen/drivers/passthrough/amd/pci_amd_iommu.c > index 42b5a5a9bec4..81add0ba26b4 100644 > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c > @@ -381,9 +381,18 @@ static int amd_iommu_assign_device(struct domain > *d, u8 devfn, > return reassign_device(pdev->domain, d, devfn, pdev); > } > > +static void iommu_clear_root_pgtable(struct domain *d) > +{ > + struct domain_iommu *hd = dom_iommu(d); > + > + spin_lock(&hd->arch.mapping_lock); > + hd->arch.amd.root_table = NULL; > + spin_unlock(&hd->arch.mapping_lock); > +} > + > static void amd_iommu_domain_destroy(struct domain *d) > { > - dom_iommu(d)->arch.amd.root_table = NULL; > + ASSERT(!dom_iommu(d)->arch.amd.root_table); > } > > static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev) > @@ -565,6 +574,7 @@ static const struct iommu_ops __initconstrel > _iommu_ops = { > .remove_device = amd_iommu_remove_device, > .assign_device = amd_iommu_assign_device, > .teardown = amd_iommu_domain_destroy, > + .clear_root_pgtable = iommu_clear_root_pgtable, > .map_page = amd_iommu_map_page, > .unmap_page = amd_iommu_unmap_page, > .iotlb_flush = amd_iommu_flush_iotlb_pages, > diff --git a/xen/drivers/passthrough/vtd/iommu.c > b/xen/drivers/passthrough/vtd/iommu.c > index d136fe36883b..e1871f6c2bc1 100644 > --- a/xen/drivers/passthrough/vtd/iommu.c > +++ b/xen/drivers/passthrough/vtd/iommu.c > @@ -1726,6 +1726,15 @@ out: > return ret; > } > > +static void iommu_clear_root_pgtable(struct domain *d) > +{ > + struct domain_iommu *hd = dom_iommu(d); > + > + spin_lock(&hd->arch.mapping_lock); > + hd->arch.vtd.pgd_maddr = 0; > + spin_unlock(&hd->arch.mapping_lock); > +} > + > static void iommu_domain_teardown(struct domain *d) > { > struct domain_iommu *hd = dom_iommu(d); > @@ -1740,7 +1749,7 @@ static void iommu_domain_teardown(struct > domain *d) > xfree(mrmrr); > } > > - hd->arch.vtd.pgd_maddr = 0; > + ASSERT(!hd->arch.vtd.pgd_maddr); > } > > static int __must_check intel_iommu_map_page(struct domain *d, dfn_t dfn, > @@ -2719,6 +2728,7 @@ static struct iommu_ops __initdata vtd_ops = { > .remove_device = intel_iommu_remove_device, > .assign_device = intel_iommu_assign_device, > .teardown = iommu_domain_teardown, > + .clear_root_pgtable = iommu_clear_root_pgtable, > .map_page = intel_iommu_map_page, > .unmap_page = intel_iommu_unmap_page, > .lookup_page = intel_iommu_lookup_page, > diff --git a/xen/drivers/passthrough/x86/iommu.c > b/xen/drivers/passthrough/x86/iommu.c > index 82d770107a47..d3cdec6ee83f 100644 > --- a/xen/drivers/passthrough/x86/iommu.c > +++ b/xen/drivers/passthrough/x86/iommu.c > @@ -280,6 +280,12 @@ int iommu_free_pgtables(struct domain *d) > /* After this barrier no new page allocations can occur. */ > spin_barrier(&hd->arch.pgtables.lock); > > + /* > + * Pages will be moved to the free list in a bit. So we want to > + * clear the root page-table to avoid any potential use after-free. > + */ > + hd->platform_ops->clear_root_pgtable(d); > + > while ( (pg = page_list_remove_head(&hd->arch.pgtables.list)) ) > { > free_domheap_page(pg); > diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h > index 863a68fe1622..d59ed7cbad43 100644 > --- a/xen/include/xen/iommu.h > +++ b/xen/include/xen/iommu.h > @@ -272,6 +272,7 @@ struct iommu_ops { > > int (*adjust_irq_affinities)(void); > void (*sync_cache)(const void *addr, unsigned int size); > + void (*clear_root_pgtable)(struct domain *d); > #endif /* CONFIG_X86 */ > > int __must_check (*suspend)(void); > -- > 2.17.1
On 09.02.2021 16:28, Julien Grall wrote: > From: Julien Grall <jgrall@amazon.com> > > The new per-domain IOMMU page-table allocator will now free the > page-tables when domain's resources are relinquished. However, the root > page-table (i.e. hd->arch.pg_maddr) will not be cleared. > > Xen may access the IOMMU page-tables afterwards at least in the case of > PV domain: > > (XEN) Xen call trace: > (XEN) [<ffff82d04025b4b2>] R iommu.c#addr_to_dma_page_maddr+0x12e/0x1d8 > (XEN) [<ffff82d04025b695>] F iommu.c#intel_iommu_unmap_page+0x5d/0xf8 > (XEN) [<ffff82d0402695f3>] F iommu_unmap+0x9c/0x129 > (XEN) [<ffff82d0402696a6>] F iommu_legacy_unmap+0x26/0x63 So if unmap was (also) short-circuited for dying domains, this path wouldn't be taken and all would be well even without this change? Jan
Hi Kevin, On 10/02/2021 02:21, Tian, Kevin wrote: >> From: Julien Grall <julien@xen.org> >> Sent: Tuesday, February 9, 2021 11:28 PM >> >> From: Julien Grall <jgrall@amazon.com> >> >> The new per-domain IOMMU page-table allocator will now free the >> page-tables when domain's resources are relinquished. However, the root >> page-table (i.e. hd->arch.pg_maddr) will not be cleared. > > It's the pointer not the table itself. I don't think the sentence is incorrect. One can view clearing as either clearing the page table itself or the pointer. > >> >> Xen may access the IOMMU page-tables afterwards at least in the case of >> PV domain: >> >> (XEN) Xen call trace: >> (XEN) [<ffff82d04025b4b2>] R >> iommu.c#addr_to_dma_page_maddr+0x12e/0x1d8 >> (XEN) [<ffff82d04025b695>] F >> iommu.c#intel_iommu_unmap_page+0x5d/0xf8 >> (XEN) [<ffff82d0402695f3>] F iommu_unmap+0x9c/0x129 >> (XEN) [<ffff82d0402696a6>] F iommu_legacy_unmap+0x26/0x63 >> (XEN) [<ffff82d04033c5c7>] F mm.c#cleanup_page_mappings+0x139/0x144 >> (XEN) [<ffff82d04033c61d>] F put_page+0x4b/0xb3 >> (XEN) [<ffff82d04033c87f>] F put_page_from_l1e+0x136/0x13b >> (XEN) [<ffff82d04033cada>] F devalidate_page+0x256/0x8dc >> (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e >> (XEN) [<ffff82d04033d64d>] F mm.c#put_pt_page+0x6f/0x80 >> (XEN) [<ffff82d04033d8d6>] F mm.c#put_page_from_l2e+0x8a/0xcf >> (XEN) [<ffff82d04033cc27>] F devalidate_page+0x3a3/0x8dc >> (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e >> (XEN) [<ffff82d04033d64d>] F mm.c#put_pt_page+0x6f/0x80 >> (XEN) [<ffff82d04033d807>] F mm.c#put_page_from_l3e+0x8a/0xcf >> (XEN) [<ffff82d04033cdf0>] F devalidate_page+0x56c/0x8dc >> (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e >> (XEN) [<ffff82d04033d64d>] F mm.c#put_pt_page+0x6f/0x80 >> (XEN) [<ffff82d04033d6c7>] F mm.c#put_page_from_l4e+0x69/0x6d >> (XEN) [<ffff82d04033cf24>] F devalidate_page+0x6a0/0x8dc >> (XEN) [<ffff82d04033d396>] F mm.c#_put_page_type+0x236/0x47e >> (XEN) [<ffff82d04033d92e>] F put_page_type_preemptible+0x13/0x15 >> (XEN) [<ffff82d04032598a>] F domain.c#relinquish_memory+0x1ff/0x4e9 >> (XEN) [<ffff82d0403295f2>] F domain_relinquish_resources+0x2b6/0x36a >> (XEN) [<ffff82d040205cdf>] F domain_kill+0xb8/0x141 >> (XEN) [<ffff82d040236cac>] F do_domctl+0xb6f/0x18e5 >> (XEN) [<ffff82d04031d098>] F pv_hypercall+0x2f0/0x55f >> (XEN) [<ffff82d04039b432>] F lstar_enter+0x112/0x120 >> >> This will result to a use after-free and possibly an host crash or >> memory corruption. >> >> Freeing the page-tables further down in domain_relinquish_resources() >> would not work because pages may not be released until later if another >> domain hold a reference on them. >> >> Once all the PCI devices have been de-assigned, it is actually pointless >> to access modify the IOMMU page-tables. So we can simply clear the root >> page-table address. > > Above two paragraphs are confusing to me. I don't know what exact change > is proposed until looking over the whole patch. Isn't it clearer to just say > "We should clear the root page table pointer in iommu_free_pgtables to > avoid use-after-free after all pgtables are moved to the free list"? Your sentence explain the approach taken but not the rational behind the approach. Both are equally important for future reference. I will try to reword it. Cheers,
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c index 42b5a5a9bec4..81add0ba26b4 100644 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -381,9 +381,18 @@ static int amd_iommu_assign_device(struct domain *d, u8 devfn, return reassign_device(pdev->domain, d, devfn, pdev); } +static void iommu_clear_root_pgtable(struct domain *d) +{ + struct domain_iommu *hd = dom_iommu(d); + + spin_lock(&hd->arch.mapping_lock); + hd->arch.amd.root_table = NULL; + spin_unlock(&hd->arch.mapping_lock); +} + static void amd_iommu_domain_destroy(struct domain *d) { - dom_iommu(d)->arch.amd.root_table = NULL; + ASSERT(!dom_iommu(d)->arch.amd.root_table); } static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev) @@ -565,6 +574,7 @@ static const struct iommu_ops __initconstrel _iommu_ops = { .remove_device = amd_iommu_remove_device, .assign_device = amd_iommu_assign_device, .teardown = amd_iommu_domain_destroy, + .clear_root_pgtable = iommu_clear_root_pgtable, .map_page = amd_iommu_map_page, .unmap_page = amd_iommu_unmap_page, .iotlb_flush = amd_iommu_flush_iotlb_pages, diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c index d136fe36883b..e1871f6c2bc1 100644 --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -1726,6 +1726,15 @@ out: return ret; } +static void iommu_clear_root_pgtable(struct domain *d) +{ + struct domain_iommu *hd = dom_iommu(d); + + spin_lock(&hd->arch.mapping_lock); + hd->arch.vtd.pgd_maddr = 0; + spin_unlock(&hd->arch.mapping_lock); +} + static void iommu_domain_teardown(struct domain *d) { struct domain_iommu *hd = dom_iommu(d); @@ -1740,7 +1749,7 @@ static void iommu_domain_teardown(struct domain *d) xfree(mrmrr); } - hd->arch.vtd.pgd_maddr = 0; + ASSERT(!hd->arch.vtd.pgd_maddr); } static int __must_check intel_iommu_map_page(struct domain *d, dfn_t dfn, @@ -2719,6 +2728,7 @@ static struct iommu_ops __initdata vtd_ops = { .remove_device = intel_iommu_remove_device, .assign_device = intel_iommu_assign_device, .teardown = iommu_domain_teardown, + .clear_root_pgtable = iommu_clear_root_pgtable, .map_page = intel_iommu_map_page, .unmap_page = intel_iommu_unmap_page, .lookup_page = intel_iommu_lookup_page, diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c index 82d770107a47..d3cdec6ee83f 100644 --- a/xen/drivers/passthrough/x86/iommu.c +++ b/xen/drivers/passthrough/x86/iommu.c @@ -280,6 +280,12 @@ int iommu_free_pgtables(struct domain *d) /* After this barrier no new page allocations can occur. */ spin_barrier(&hd->arch.pgtables.lock); + /* + * Pages will be moved to the free list in a bit. So we want to + * clear the root page-table to avoid any potential use after-free. + */ + hd->platform_ops->clear_root_pgtable(d); + while ( (pg = page_list_remove_head(&hd->arch.pgtables.list)) ) { free_domheap_page(pg); diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h index 863a68fe1622..d59ed7cbad43 100644 --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -272,6 +272,7 @@ struct iommu_ops { int (*adjust_irq_affinities)(void); void (*sync_cache)(const void *addr, unsigned int size); + void (*clear_root_pgtable)(struct domain *d); #endif /* CONFIG_X86 */ int __must_check (*suspend)(void);