Message ID | 1450859136-98482-2-git-send-email-quan.xu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> From: Xu, Quan > Sent: Wednesday, December 23, 2015 4:26 PM > > This patch checks all kinds of error and all the way up > the call trees of VT-d Device-TLB flush. > > Signed-off-by: Quan Xu <quan.xu@intel.com> > --- > xen/arch/x86/acpi/power.c | 8 +- > xen/arch/x86/crash.c | 3 +- > xen/arch/x86/domain_build.c | 5 +- > xen/arch/x86/mm.c | 15 ++- > xen/arch/x86/mm/p2m-ept.c | 14 ++- > xen/arch/x86/mm/p2m-pt.c | 14 ++- > xen/arch/x86/mm/p2m.c | 19 +++- > xen/arch/x86/x86_64/mm.c | 7 +- > xen/common/domain.c | 3 +- > xen/common/grant_table.c | 5 +- > xen/common/memory.c | 13 ++- > xen/drivers/passthrough/amd/iommu_init.c | 4 +- > xen/drivers/passthrough/amd/pci_amd_iommu.c | 4 +- > xen/drivers/passthrough/arm/smmu.c | 13 ++- > xen/drivers/passthrough/iommu.c | 47 +++++--- > xen/drivers/passthrough/vtd/extern.h | 4 +- > xen/drivers/passthrough/vtd/iommu.c | 157 > ++++++++++++++++++++------ > xen/drivers/passthrough/vtd/qinval.c | 2 +- > xen/drivers/passthrough/vtd/quirks.c | 26 +++-- > xen/drivers/passthrough/vtd/x86/vtd.c | 13 ++- > xen/drivers/passthrough/x86/iommu.c | 6 +- > xen/include/asm-x86/hvm/svm/amd-iommu-proto.h | 4 +- > xen/include/asm-x86/iommu.h | 2 +- > xen/include/xen/iommu.h | 20 ++-- > 24 files changed, 300 insertions(+), 108 deletions(-) > > diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c > index f41f0de..1974721 100644 > --- a/xen/arch/x86/acpi/power.c > +++ b/xen/arch/x86/acpi/power.c > @@ -45,6 +45,8 @@ void do_suspend_lowlevel(void); > > static int device_power_down(void) > { > + int rc; > + > console_suspend(); > > time_suspend(); > @@ -53,7 +55,9 @@ static int device_power_down(void) > > ioapic_suspend(); > > - iommu_suspend(); > + rc = iommu_suspend(); > + if ( rc ) > + return rc; > > lapic_suspend(); > Looks error handling is not only a problem in VT-d code. Above actually should check return values of all suspend callbacks. Just checking iommu_suspend is not enough, but it's a good improvement anyway... [...] > diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c > index bca6fe7..a11bc2a 100644 > --- a/xen/arch/x86/domain_build.c > +++ b/xen/arch/x86/domain_build.c > @@ -1627,7 +1627,10 @@ int __init construct_dom0( > } > > if ( d->domain_id == hardware_domid ) > - iommu_hwdom_init(d); > + { > + if ( iommu_hwdom_init(d) ) > + printk("Xen warning : IOMMU hardware domain init failed.\n"); > + } if construct_dom0 fails, guess we can panic here? e.g. simply move earlier BUG_ON(rc != 0) after above trunk. In an ideal case we may disable iommu_enabled upon error at this point, to allow moving forward. But that can be improved separately. [...] > diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c > index 202ff76..3c1db05 100644 > --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -2443,11 +2443,18 @@ static int __get_page_type(struct page_info *page, unsigned > long type, > if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) ) > { > if ( (x & PGT_type_mask) == PGT_writable_page ) > - iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page))); > + { > + rc = iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page))); > + return rc; > + } looks you return absolutely regardless of error check. There are still some useful code after this point... > else if ( type == PGT_writable_page ) > - iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)), > - page_to_mfn(page), > - IOMMUF_readable|IOMMUF_writable); > + { > + rc = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)), > + page_to_mfn(page), > + IOMMUF_readable|IOMMUF_writable); > + if ( rc ) > + return rc; > + } this one is correct. [...] > diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c > index 709920a..b2b340d 100644 > --- a/xen/arch/x86/mm/p2m-pt.c > +++ b/xen/arch/x86/mm/p2m-pt.c > @@ -675,11 +675,19 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long > gfn, mfn_t mfn, > } curious why there's no similar check on code below: if ( iommu_use_hap_pt(p2m->domain) ) { if ( iommu_old_flags ) **amd_iommu_flush_pages(p2m->domain, gfn, page_order)**; } > else if ( iommu_pte_flags ) > for ( i = 0; i < (1UL << page_order); i++ ) > - iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i, > - iommu_pte_flags); > + { > + rc = iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i, > + iommu_pte_flags); > + if ( rc ) > + goto out; looks 'break' should be enough here. [...] > diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c > index c6b883d..6b43da0 100644 > --- a/xen/arch/x86/mm/p2m.c > +++ b/xen/arch/x86/mm/p2m.c > @@ -654,7 +659,7 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn, > p2m_access_t a; > mfn_t omfn; > int pod_count = 0; > - int rc = 0; > + int rc = 0, ret = 0; > > if ( !paging_mode_translate(d) ) > { > @@ -667,7 +672,15 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn, > if ( rc != 0 ) > { > while ( i-- > 0 ) > - iommu_unmap_page(d, mfn + i); > + { > + ret = iommu_unmap_page(d, mfn + i); > + if ( ret ) > + break; > + } > + > + if ( ret ) > + rc = ret; > + you can reuse 'rc' here. [...] > diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c > index d918002..fe7b10c 100644 > --- a/xen/arch/x86/x86_64/mm.c > +++ b/xen/arch/x86/x86_64/mm.c > @@ -1438,7 +1438,12 @@ int memory_add(unsigned long spfn, unsigned long epfn, > unsigned int pxm) > if ( i != epfn ) > { > while (i-- > old_max) > - iommu_unmap_page(hardware_domain, i); > + { > + ret = iommu_unmap_page(hardware_domain, i); > + if ( ret ) > + break; > + } > + here you can do simple check: if (iommu_unmap_page(hardware_domain, i)) break; [...] > diff --git a/xen/common/domain.c b/xen/common/domain.c > index 1b9fcfc..11f526d 100644 > --- a/xen/common/domain.c > +++ b/xen/common/domain.c > @@ -228,7 +228,8 @@ static int late_hwdom_init(struct domain *d) > > rcu_unlock_domain(dom0); > > - iommu_hwdom_init(d); > + if ( iommu_hwdom_init(d) ) > + printk("Xen warning : IOMMU hardware domain init failed.\n"); > > return rv; > #else rv = iommu_hwdom_init(d), otherwise error is not propagated outside. > diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c > index 2b449d5..5faa61e 100644 > --- a/xen/common/grant_table.c > +++ b/xen/common/grant_table.c > @@ -920,7 +920,10 @@ __gnttab_map_grant_ref( > nr_gets++; > (void)get_page(pg, rd); > if ( !(op->flags & GNTMAP_readonly) ) > - get_page_type(pg, PGT_writable_page); > + { > + if ( get_page_type(pg, PGT_writable_page) ) > + goto could_not_pin; > + } combine two ifs together. > diff --git a/xen/common/memory.c b/xen/common/memory.c > index b541f4a1..989b461 100644 > --- a/xen/common/memory.c > +++ b/xen/common/memory.c > @@ -593,6 +593,10 @@ static int xenmem_add_to_physmap(struct domain *d, > unsigned int done = 0; > long rc = 0; > > +#ifdef HAS_PASSTHROUGH > + int ret = 0; > +#endif > + I think you can reuse rc here. > if ( xatp->space != XENMAPSPACE_gmfn_range ) > return xenmem_add_to_physmap_one(d, xatp->space, DOMID_INVALID, > xatp->idx, xatp->gpfn); > @@ -631,8 +635,13 @@ static int xenmem_add_to_physmap(struct domain *d, > if ( need_iommu(d) ) > { > this_cpu(iommu_dont_flush_iotlb) = 0; > - iommu_iotlb_flush(d, xatp->idx - done, done); > - iommu_iotlb_flush(d, xatp->gpfn - done, done); > + ret = iommu_iotlb_flush(d, xatp->idx - done, done); > + if ( ret ) > + return ret; > + > + ret = iommu_iotlb_flush(d, xatp->gpfn - done, done); > + if ( ret ) > + return ret; > } rc = iommu_iotlb_flush(d, xatp->idx - done, done); if ( !rc ) rc = iommu_iotlb_flush(d, xatp->gpfn - done, done); return rc; [...] > diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c > index d5137733..34e4ef9 100644 > --- a/xen/drivers/passthrough/iommu.c > +++ b/xen/drivers/passthrough/iommu.c > @@ -146,14 +146,15 @@ static void __hwdom_init check_hwdom_reqs(struct domain *d) > iommu_dom0_strict = 1; > } > > -void __hwdom_init iommu_hwdom_init(struct domain *d) > +int __hwdom_init iommu_hwdom_init(struct domain *d) > { > struct hvm_iommu *hd = domain_hvm_iommu(d); > + int rc = 0; > > check_hwdom_reqs(d); > > if ( !iommu_enabled ) > - return; > + return -EINVAL; iommu_enabled can be false if user chooses so. You should return ZERO here to indicate success. [...] > @@ -354,11 +358,19 @@ int iommu_do_domctl( > return ret; > } > > -void iommu_suspend() > +int iommu_suspend() > { > const struct iommu_ops *ops = iommu_get_ops(); > + int rc; > + > if ( iommu_enabled ) > - ops->suspend(); > + { > + rc = ops->suspend(); > + if ( rc ) > + return rc; > + } > + > + return 0; if ( iommu_enabled ) return ops->suspend(); return 0; > @@ -369,12 +381,21 @@ void iommu_share_p2m_table(struct domain* d) > ops->share_p2m(d); > } > > -void iommu_crash_shutdown(void) > +int iommu_crash_shutdown(void) > { > const struct iommu_ops *ops = iommu_get_ops(); > + int rc; > + > if ( iommu_enabled ) > - ops->crash_shutdown(); > + { > + rc = ops->crash_shutdown(); > + if ( rc ) > + return rc; > + } > + ditto. [...] > diff --git a/xen/drivers/passthrough/vtd/iommu.c > b/xen/drivers/passthrough/vtd/iommu.c > index dd13865..08aaaec 100644 > --- a/xen/drivers/passthrough/vtd/iommu.c > +++ b/xen/drivers/passthrough/vtd/iommu.c > @@ -566,6 +571,7 @@ static void __intel_iommu_iotlb_flush(struct domain *d, unsigned > long gfn, > struct iommu *iommu; > int flush_dev_iotlb; > int iommu_domid; > + int rc; > > /* > * No need pcideves_lock here because we have flush > @@ -585,36 +591,47 @@ static void __intel_iommu_iotlb_flush(struct domain *d, > unsigned long gfn, > > if ( page_count > 1 || gfn == -1 ) > { > - if ( iommu_flush_iotlb_dsi(iommu, iommu_domid, > - 0, flush_dev_iotlb) ) > + rc = iommu_flush_iotlb_dsi(iommu, iommu_domid, > + 0, flush_dev_iotlb); > + if ( rc ) > + { > iommu_flush_write_buffer(iommu); > + return rc; > + } > } > else > { > - if ( iommu_flush_iotlb_psi(iommu, iommu_domid, > + rc = iommu_flush_iotlb_psi(iommu, iommu_domid, > (paddr_t)gfn << PAGE_SHIFT_4K, 0, > - !dma_old_pte_present, flush_dev_iotlb) ) > + !dma_old_pte_present, flush_dev_iotlb); > + if ( rc ) > + { > iommu_flush_write_buffer(iommu); > + return rc; > + } iommu_flush_write_buffer can be combined to one for above two branches. > /* clear one page's page table */ > -static void dma_pte_clear_one(struct domain *domain, u64 addr) > +static int dma_pte_clear_one(struct domain *domain, u64 addr) > { > struct hvm_iommu *hd = domain_hvm_iommu(domain); > struct dma_pte *page = NULL, *pte = NULL; > u64 pg_maddr; > + int rc; > > spin_lock(&hd->arch.mapping_lock); > /* get last level pte */ > @@ -622,7 +639,7 @@ static void dma_pte_clear_one(struct domain *domain, u64 addr) > if ( pg_maddr == 0 ) > { > spin_unlock(&hd->arch.mapping_lock); > - return; > + return -ENOENT; stay consistent to other places which use -ENOMEM. > } > > page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); > @@ -632,7 +649,7 @@ static void dma_pte_clear_one(struct domain *domain, u64 addr) > { > spin_unlock(&hd->arch.mapping_lock); > unmap_vtd_domain_page(page); > - return; > + return -ENOENT; It's a sane case if above code is referred to below: if ( !dma_pte_present(*pte) ) { spin_unlock(&hd->arch.mapping_lock); unmap_vtd_domain_page(page); return; } > } > > dma_clear_pte(*pte); > @@ -640,9 +657,18 @@ static void dma_pte_clear_one(struct domain *domain, u64 > addr) > iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); > > if ( !this_cpu(iommu_dont_flush_iotlb) ) > - __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K, 1, 1); > + { > + rc = __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K, 1, 1); > + if ( rc ) > + { > + unmap_vtd_domain_page(page); > + return rc; > + } > + } no need for immediate check above. you can return rc in the end. > > unmap_vtd_domain_page(page); > + > + return 0; > } > > static void iommu_free_pagetable(u64 pt_maddr, int level) > @@ -1251,20 +1277,24 @@ static int intel_iommu_domain_init(struct domain *d) > return 0; > } > > -static void __hwdom_init intel_iommu_hwdom_init(struct domain *d) > +static int __hwdom_init intel_iommu_hwdom_init(struct domain *d) > { > struct acpi_drhd_unit *drhd; > + int rc; > > if ( !iommu_passthrough && !need_iommu(d) ) > { > /* Set up 1:1 page table for hardware domain. */ > - vtd_set_hwdom_mapping(d); > + rc = vtd_set_hwdom_mapping(d); > + if ( rc ) > + return rc; > } > > setup_hwdom_pci_devices(d, setup_hwdom_device); > setup_hwdom_rmrr(d); > > - iommu_flush_all(); > + if ( iommu_flush_all() ) > + printk("Xen warning : iommu flush error.\n"); why no error return in this case. > > for_each_drhd_unit ( drhd ) > { > @@ -1273,6 +1303,8 @@ static void __hwdom_init intel_iommu_hwdom_init(struct > domain *d) > BUG(); > iommu_enable_translation(drhd); > } > + > + return 0; > } > > int domain_context_mapping_one( > @@ -1404,7 +1436,14 @@ int domain_context_mapping_one( > else > { > int flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0; > - iommu_flush_iotlb_dsi(iommu, 0, 1, flush_dev_iotlb); > + int rc; > + > + rc = iommu_flush_iotlb_dsi(iommu, 0, 1, flush_dev_iotlb); > + if ( rc ) > + { > + unmap_vtd_domain_page(context_entries); > + return rc; > + } > } > > set_bit(iommu->index, &hd->arch.iommu_bitmap); > @@ -1412,7 +1451,13 @@ int domain_context_mapping_one( > unmap_vtd_domain_page(context_entries); > > if ( !seg ) > - me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC); > + { > + int rc; > + > + rc = me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC); > + if ( rc ) > + return rc; > + } if ( !seg ) return me_wifi_quirk(...); > > return 0; > } > @@ -1509,6 +1554,7 @@ int domain_context_unmap_one( > struct context_entry *context, *context_entries; > u64 maddr; > int iommu_domid; > + int rc; > > ASSERT(spin_is_locked(&pcidevs_lock)); > spin_lock(&iommu->lock); > @@ -1543,15 +1589,24 @@ int domain_context_unmap_one( > else > { > int flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0; > - iommu_flush_iotlb_dsi(iommu, iommu_domid, 0, flush_dev_iotlb); > + rc = iommu_flush_iotlb_dsi(iommu, iommu_domid, 0, flush_dev_iotlb); > + if ( rc ) > + { > + spin_unlock(&iommu->lock); > + unmap_vtd_domain_page(context_entries); > + return rc; > + } > } just rc = iommu_flush_iotlb_dsi(...) should be enough. see later. > > spin_unlock(&iommu->lock); > unmap_vtd_domain_page(context_entries); > > if ( !iommu->intel->drhd->segment ) > - me_wifi_quirk(domain, bus, devfn, UNMAP_ME_PHANTOM_FUNC); > - > + { > + rc = me_wifi_quirk(domain, bus, devfn, UNMAP_ME_PHANTOM_FUNC); > + if ( rc ) > + return rc; > + } > return 0; if ( !rc && !iommu->intel->drhd->segment ) rc = me_wifi_quirk(...); return rc; > } > > @@ -1700,6 +1755,7 @@ static int intel_iommu_map_page( > struct hvm_iommu *hd = domain_hvm_iommu(d); > struct dma_pte *page = NULL, *pte = NULL, old, new = { 0 }; > u64 pg_maddr; > + int rc; > > /* Do nothing if VT-d shares EPT page table */ > if ( iommu_use_hap_pt(d) ) > @@ -1742,30 +1798,39 @@ static int intel_iommu_map_page( > unmap_vtd_domain_page(page); > > if ( !this_cpu(iommu_dont_flush_iotlb) ) > - __intel_iommu_iotlb_flush(d, gfn, dma_pte_present(old), 1); > + { > + rc = __intel_iommu_iotlb_flush(d, gfn, dma_pte_present(old), 1); > + if ( rc ) > + return rc; > + } if ( !this_cpu(iommu_dont_flush_iotlb) ) return __intel_iommu_iotlb_flush(...); I'll stop comment for similar refinement. Please check to improve in next version. :-) > > return 0; > } > [...] > > -void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, > - int order, int present) > +int iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, > + int order, int present) > { > struct acpi_drhd_unit *drhd; > struct iommu *iommu = NULL; > struct hvm_iommu *hd = domain_hvm_iommu(d); > int flush_dev_iotlb; > int iommu_domid; > + int rc; > > iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); > > @@ -1779,11 +1844,17 @@ void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, > iommu_domid= domain_iommu_domid(d, iommu); > if ( iommu_domid == -1 ) > continue; > - if ( iommu_flush_iotlb_psi(iommu, iommu_domid, > + rc = iommu_flush_iotlb_psi(iommu, iommu_domid, > (paddr_t)gfn << PAGE_SHIFT_4K, > - order, !present, flush_dev_iotlb) ) > + order, !present, flush_dev_iotlb); > + if ( rc ) > + { > iommu_flush_write_buffer(iommu); > + return rc; > + } > } just curious. if write_buffer needs be flushed for every iotlb flush error, shouldn't it be better handled within iommu_flush_... instead of duplicating in every caller? > + > + return 0; > } > > static int __init vtd_ept_page_compatible(struct iommu *iommu) [...] > @@ -2372,16 +2447,19 @@ static int intel_iommu_group_id(u16 seg, u8 bus, u8 devfn) > } > > static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS]; > -static void vtd_suspend(void) > +static int vtd_suspend(void) > { > struct acpi_drhd_unit *drhd; > struct iommu *iommu; > + int rc; > u32 i; > > if ( !iommu_enabled ) > - return; > + return -EINVAL; not an error. > > - iommu_flush_all(); > + rc = iommu_flush_all(); > + if ( rc ) > + return rc; > > for_each_drhd_unit ( drhd ) > { > @@ -2410,17 +2488,22 @@ static void vtd_suspend(void) > if ( !iommu_intremap && iommu_qinval ) > disable_qinval(iommu); > } > + > + return 0; > } > > -static void vtd_crash_shutdown(void) > +static int vtd_crash_shutdown(void) > { > struct acpi_drhd_unit *drhd; > struct iommu *iommu; > + int rc; > > if ( !iommu_enabled ) > - return; > + return -EINVAL; ditto > > - iommu_flush_all(); > + rc = iommu_flush_all(); > + if ( rc ) > + return rc; > > for_each_drhd_unit ( drhd ) > { > @@ -2429,6 +2512,8 @@ static void vtd_crash_shutdown(void) > disable_intremap(drhd->iommu); > disable_qinval(drhd->iommu); > } > + > + return 0; > } > > static void vtd_resume(void) > diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c > index b81b0bd..946e812 100644 > --- a/xen/drivers/passthrough/vtd/qinval.c > +++ b/xen/drivers/passthrough/vtd/qinval.c > @@ -324,7 +324,7 @@ static int flush_iotlb_qi( > if ( flush_non_present_entry ) > { > if ( !cap_caching_mode(iommu->cap) ) > - return 1; > + return 0; this looks problematic. originally 0/1 is used to indicate whether caller needs to flush cache. Here you return 0 then may break something... Thanks Kevin
On December 25 2015 10:54 AM, <Tian, Kevin> wrote: > > From: Xu, Quan > > Sent: Wednesday, December 23, 2015 4:26 PM > > > > This patch checks all kinds of error and all the way up the call trees > > of VT-d Device-TLB flush. > > > > Signed-off-by: Quan Xu <quan.xu@intel.com> > > --- > > xen/arch/x86/acpi/power.c | 8 +- > > xen/arch/x86/crash.c | 3 +- > > xen/arch/x86/domain_build.c | 5 +- > > xen/arch/x86/mm.c | 15 ++- > > xen/arch/x86/mm/p2m-ept.c | 14 ++- > > xen/arch/x86/mm/p2m-pt.c | 14 ++- > > xen/arch/x86/mm/p2m.c | 19 +++- > > xen/arch/x86/x86_64/mm.c | 7 +- > > xen/common/domain.c | 3 +- > > xen/common/grant_table.c | 5 +- > > xen/common/memory.c | 13 ++- > > xen/drivers/passthrough/amd/iommu_init.c | 4 +- > > xen/drivers/passthrough/amd/pci_amd_iommu.c | 4 +- > > xen/drivers/passthrough/arm/smmu.c | 13 ++- > > xen/drivers/passthrough/iommu.c | 47 +++++--- > > xen/drivers/passthrough/vtd/extern.h | 4 +- > > xen/drivers/passthrough/vtd/iommu.c | 157 > > ++++++++++++++++++++------ > > xen/drivers/passthrough/vtd/qinval.c | 2 +- > > xen/drivers/passthrough/vtd/quirks.c | 26 +++-- > > xen/drivers/passthrough/vtd/x86/vtd.c | 13 ++- > > xen/drivers/passthrough/x86/iommu.c | 6 +- > > xen/include/asm-x86/hvm/svm/amd-iommu-proto.h | 4 +- > > xen/include/asm-x86/iommu.h | 2 +- > > xen/include/xen/iommu.h | 20 ++-- > > 24 files changed, 300 insertions(+), 108 deletions(-) > > Kevin, Thanks for your comments!! It would take much time to review it. I will fix them in next v5. May I discuss with you f2f for some dubious case? Quan
>>> On 25.12.15 at 03:53, <kevin.tian@intel.com> wrote: >> From: Xu, Quan >> Sent: Wednesday, December 23, 2015 4:26 PM >> --- a/xen/arch/x86/acpi/power.c >> +++ b/xen/arch/x86/acpi/power.c >> @@ -45,6 +45,8 @@ void do_suspend_lowlevel(void); >> >> static int device_power_down(void) >> { >> + int rc; >> + >> console_suspend(); >> >> time_suspend(); >> @@ -53,7 +55,9 @@ static int device_power_down(void) >> >> ioapic_suspend(); >> >> - iommu_suspend(); >> + rc = iommu_suspend(); >> + if ( rc ) >> + return rc; >> >> lapic_suspend(); >> > > Looks error handling is not only a problem in VT-d code. Above > actually should check return values of all suspend callbacks. Just > checking iommu_suspend is not enough, but it's a good improvement > anyway... No, it's not - it leaves the system in a non-working state without undoing whatever succeeded already. Jan
>>> On 23.12.15 at 09:25, <quan.xu@intel.com> wrote: > @@ -182,7 +186,7 @@ static int enter_state(u32 state) > error = tboot_s3_resume(); > break; > case ACPI_STATE_S5: > - acpi_enter_sleep_state(ACPI_STATE_S5); > + error = acpi_enter_sleep_state(ACPI_STATE_S5); I can't see how this is related to the purpose of the patch. I don't mind such error checking being added, but not in this huge patch. It would anyway be nice if you could see about splitting this apart, to aid reviewing and - in case it would be needed after committing - bisection. > --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -2443,11 +2443,18 @@ static int __get_page_type(struct page_info *page, unsigned long type, > if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) ) > { > if ( (x & PGT_type_mask) == PGT_writable_page ) > - iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page))); > + { > + rc = iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page))); > + return rc; > + } > else if ( type == PGT_writable_page ) > - iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)), > - page_to_mfn(page), > - IOMMUF_readable|IOMMUF_writable); > + { > + rc = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)), > + page_to_mfn(page), > + IOMMUF_readable|IOMMUF_writable); > + if ( rc ) > + return rc; > + } > } > } Again you can't simply return here, or else you leak the type reference, and you indefinitely stall any other CPU waiting for page validation to happen. > --- a/xen/arch/x86/mm/p2m-ept.c > +++ b/xen/arch/x86/mm/p2m-ept.c > @@ -829,15 +829,23 @@ out: > need_modify_vtd_table ) > { > if ( iommu_hap_pt_share ) > - iommu_pte_flush(d, gfn, &ept_entry->epte, order, vtd_pte_present); > + rc = iommu_pte_flush(d, gfn, &ept_entry->epte, order, vtd_pte_present); > else > { > if ( iommu_flags ) > for ( i = 0; i < (1 << order); i++ ) > - iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags); > + { > + rc = iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags); > + if ( rc ) > + break; > + } And the pattern repeats - you can't just exit without undoing what so far was done. > else > for ( i = 0; i < (1 << order); i++ ) > - iommu_unmap_page(d, gfn + i); > + { > + rc = iommu_unmap_page(d, gfn + i); > + if ( rc ) > + break; > + } As a special case, unmapping should perhaps continue despite an error, in an attempt to do best effort cleanup. I'm not going to continue further down, as I suspect I'll find more of the same class of issues. Jan
diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c index f41f0de..1974721 100644 --- a/xen/arch/x86/acpi/power.c +++ b/xen/arch/x86/acpi/power.c @@ -45,6 +45,8 @@ void do_suspend_lowlevel(void); static int device_power_down(void) { + int rc; + console_suspend(); time_suspend(); @@ -53,7 +55,9 @@ static int device_power_down(void) ioapic_suspend(); - iommu_suspend(); + rc = iommu_suspend(); + if ( rc ) + return rc; lapic_suspend(); @@ -182,7 +186,7 @@ static int enter_state(u32 state) error = tboot_s3_resume(); break; case ACPI_STATE_S5: - acpi_enter_sleep_state(ACPI_STATE_S5); + error = acpi_enter_sleep_state(ACPI_STATE_S5); break; default: error = -EINVAL; diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c index 888a214..59e1af6 100644 --- a/xen/arch/x86/crash.c +++ b/xen/arch/x86/crash.c @@ -170,7 +170,8 @@ static void nmi_shootdown_cpus(void) /* Crash shutdown any IOMMU functionality as the crashdump kernel is not * happy when booting if interrupt/dma remapping is still enabled */ - iommu_crash_shutdown(); + if ( iommu_crash_shutdown() ) + printk("Failed to shut down IOMMU.\n"); __stop_this_cpu(); diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index bca6fe7..a11bc2a 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -1627,7 +1627,10 @@ int __init construct_dom0( } if ( d->domain_id == hardware_domid ) - iommu_hwdom_init(d); + { + if ( iommu_hwdom_init(d) ) + printk("Xen warning : IOMMU hardware domain init failed.\n"); + } return 0; diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 202ff76..3c1db05 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -2443,11 +2443,18 @@ static int __get_page_type(struct page_info *page, unsigned long type, if ( d && is_pv_domain(d) && unlikely(need_iommu(d)) ) { if ( (x & PGT_type_mask) == PGT_writable_page ) - iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page))); + { + rc = iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page))); + return rc; + } else if ( type == PGT_writable_page ) - iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)), - page_to_mfn(page), - IOMMUF_readable|IOMMUF_writable); + { + rc = iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)), + page_to_mfn(page), + IOMMUF_readable|IOMMUF_writable); + if ( rc ) + return rc; + } } } diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c index 9860c6c..2ed43b0 100644 --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -829,15 +829,23 @@ out: need_modify_vtd_table ) { if ( iommu_hap_pt_share ) - iommu_pte_flush(d, gfn, &ept_entry->epte, order, vtd_pte_present); + rc = iommu_pte_flush(d, gfn, &ept_entry->epte, order, vtd_pte_present); else { if ( iommu_flags ) for ( i = 0; i < (1 << order); i++ ) - iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags); + { + rc = iommu_map_page(d, gfn + i, mfn_x(mfn) + i, iommu_flags); + if ( rc ) + break; + } else for ( i = 0; i < (1 << order); i++ ) - iommu_unmap_page(d, gfn + i); + { + rc = iommu_unmap_page(d, gfn + i); + if ( rc ) + break; + } } } diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c index 709920a..b2b340d 100644 --- a/xen/arch/x86/mm/p2m-pt.c +++ b/xen/arch/x86/mm/p2m-pt.c @@ -675,11 +675,19 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, } else if ( iommu_pte_flags ) for ( i = 0; i < (1UL << page_order); i++ ) - iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i, - iommu_pte_flags); + { + rc = iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i, + iommu_pte_flags); + if ( rc ) + goto out; + } else for ( i = 0; i < (1UL << page_order); i++ ) - iommu_unmap_page(p2m->domain, gfn + i); + { + rc = iommu_unmap_page(p2m->domain, gfn + i); + if ( rc ) + goto out; + } } /* diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index c6b883d..6b43da0 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -605,12 +605,17 @@ p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn, mfn_t mfn_return; p2m_type_t t; p2m_access_t a; + int rc; if ( !paging_mode_translate(p2m->domain) ) { if ( need_iommu(p2m->domain) ) for ( i = 0; i < (1 << page_order); i++ ) - iommu_unmap_page(p2m->domain, mfn + i); + { + rc = iommu_unmap_page(p2m->domain, mfn + i); + if ( rc ) + return rc; + } return 0; } @@ -654,7 +659,7 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn, p2m_access_t a; mfn_t omfn; int pod_count = 0; - int rc = 0; + int rc = 0, ret = 0; if ( !paging_mode_translate(d) ) { @@ -667,7 +672,15 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn, if ( rc != 0 ) { while ( i-- > 0 ) - iommu_unmap_page(d, mfn + i); + { + ret = iommu_unmap_page(d, mfn + i); + if ( ret ) + break; + } + + if ( ret ) + rc = ret; + return rc; } } diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index d918002..fe7b10c 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -1438,7 +1438,12 @@ int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm) if ( i != epfn ) { while (i-- > old_max) - iommu_unmap_page(hardware_domain, i); + { + ret = iommu_unmap_page(hardware_domain, i); + if ( ret ) + break; + } + goto destroy_m2p; } } diff --git a/xen/common/domain.c b/xen/common/domain.c index 1b9fcfc..11f526d 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -228,7 +228,8 @@ static int late_hwdom_init(struct domain *d) rcu_unlock_domain(dom0); - iommu_hwdom_init(d); + if ( iommu_hwdom_init(d) ) + printk("Xen warning : IOMMU hardware domain init failed.\n"); return rv; #else diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index 2b449d5..5faa61e 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -920,7 +920,10 @@ __gnttab_map_grant_ref( nr_gets++; (void)get_page(pg, rd); if ( !(op->flags & GNTMAP_readonly) ) - get_page_type(pg, PGT_writable_page); + { + if ( get_page_type(pg, PGT_writable_page) ) + goto could_not_pin; + } } } } diff --git a/xen/common/memory.c b/xen/common/memory.c index b541f4a1..989b461 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -593,6 +593,10 @@ static int xenmem_add_to_physmap(struct domain *d, unsigned int done = 0; long rc = 0; +#ifdef HAS_PASSTHROUGH + int ret = 0; +#endif + if ( xatp->space != XENMAPSPACE_gmfn_range ) return xenmem_add_to_physmap_one(d, xatp->space, DOMID_INVALID, xatp->idx, xatp->gpfn); @@ -631,8 +635,13 @@ static int xenmem_add_to_physmap(struct domain *d, if ( need_iommu(d) ) { this_cpu(iommu_dont_flush_iotlb) = 0; - iommu_iotlb_flush(d, xatp->idx - done, done); - iommu_iotlb_flush(d, xatp->gpfn - done, done); + ret = iommu_iotlb_flush(d, xatp->idx - done, done); + if ( ret ) + return ret; + + ret = iommu_iotlb_flush(d, xatp->gpfn - done, done); + if ( ret ) + return ret; } #endif diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c index d90a2d2..ec47e22 100644 --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -1340,12 +1340,14 @@ static void invalidate_all_devices(void) iterate_ivrs_mappings(_invalidate_all_devices); } -void amd_iommu_suspend(void) +int amd_iommu_suspend(void) { struct amd_iommu *iommu; for_each_amd_iommu ( iommu ) disable_iommu(iommu); + + return 0; } void amd_iommu_resume(void) diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c index c1c0b6b..449de13 100644 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -280,7 +280,7 @@ static int amd_iommu_domain_init(struct domain *d) return 0; } -static void __hwdom_init amd_iommu_hwdom_init(struct domain *d) +static int __hwdom_init amd_iommu_hwdom_init(struct domain *d) { unsigned long i; const struct amd_iommu *iommu; @@ -312,6 +312,8 @@ static void __hwdom_init amd_iommu_hwdom_init(struct domain *d) BUG(); setup_hwdom_pci_devices(d, amd_iommu_setup_hwdom_device); + + return 0; } void amd_iommu_disable_domain_device(struct domain *domain, diff --git a/xen/drivers/passthrough/arm/smmu.c b/xen/drivers/passthrough/arm/smmu.c index bb08827..155b7f3 100644 --- a/xen/drivers/passthrough/arm/smmu.c +++ b/xen/drivers/passthrough/arm/smmu.c @@ -2544,7 +2544,7 @@ static int force_stage = 2; */ static u32 platform_features = ARM_SMMU_FEAT_COHERENT_WALK; -static void arm_smmu_iotlb_flush_all(struct domain *d) +static int arm_smmu_iotlb_flush_all(struct domain *d) { struct arm_smmu_xen_domain *smmu_domain = domain_hvm_iommu(d)->arch.priv; struct iommu_domain *cfg; @@ -2561,13 +2561,15 @@ static void arm_smmu_iotlb_flush_all(struct domain *d) arm_smmu_tlb_inv_context(cfg->priv); } spin_unlock(&smmu_domain->lock); + + return 0; } -static void arm_smmu_iotlb_flush(struct domain *d, unsigned long gfn, - unsigned int page_count) +static int arm_smmu_iotlb_flush(struct domain *d, unsigned long gfn, + unsigned int page_count) { /* ARM SMMU v1 doesn't have flush by VMA and VMID */ - arm_smmu_iotlb_flush_all(d); + return arm_smmu_iotlb_flush_all(d); } static struct iommu_domain *arm_smmu_get_domain(struct domain *d, @@ -2737,8 +2739,9 @@ static int arm_smmu_iommu_domain_init(struct domain *d) return 0; } -static void __hwdom_init arm_smmu_iommu_hwdom_init(struct domain *d) +static int __hwdom_init arm_smmu_iommu_hwdom_init(struct domain *d) { + return 0; } static void arm_smmu_iommu_domain_teardown(struct domain *d) diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c index d5137733..34e4ef9 100644 --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -146,14 +146,15 @@ static void __hwdom_init check_hwdom_reqs(struct domain *d) iommu_dom0_strict = 1; } -void __hwdom_init iommu_hwdom_init(struct domain *d) +int __hwdom_init iommu_hwdom_init(struct domain *d) { struct hvm_iommu *hd = domain_hvm_iommu(d); + int rc = 0; check_hwdom_reqs(d); if ( !iommu_enabled ) - return; + return -EINVAL; register_keyhandler('o', &iommu_p2m_table); d->need_iommu = !!iommu_dom0_strict; @@ -171,7 +172,10 @@ void __hwdom_init iommu_hwdom_init(struct domain *d) ((page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page) ) mapping |= IOMMUF_writable; - hd->platform_ops->map_page(d, gfn, mfn, mapping); + rc = hd->platform_ops->map_page(d, gfn, mfn, mapping); + if ( rc ) + return rc; + if ( !(i++ & 0xfffff) ) process_pending_softirqs(); } @@ -266,24 +270,24 @@ static void iommu_free_pagetables(unsigned long unused) cpumask_cycle(smp_processor_id(), &cpu_online_map)); } -void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count) +int iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count) { struct hvm_iommu *hd = domain_hvm_iommu(d); if ( !iommu_enabled || !hd->platform_ops || !hd->platform_ops->iotlb_flush ) - return; + return 0; - hd->platform_ops->iotlb_flush(d, gfn, page_count); + return hd->platform_ops->iotlb_flush(d, gfn, page_count); } -void iommu_iotlb_flush_all(struct domain *d) +int iommu_iotlb_flush_all(struct domain *d) { struct hvm_iommu *hd = domain_hvm_iommu(d); if ( !iommu_enabled || !hd->platform_ops || !hd->platform_ops->iotlb_flush_all ) - return; + return 0; - hd->platform_ops->iotlb_flush_all(d); + return hd->platform_ops->iotlb_flush_all(d); } int __init iommu_setup(void) @@ -354,11 +358,19 @@ int iommu_do_domctl( return ret; } -void iommu_suspend() +int iommu_suspend() { const struct iommu_ops *ops = iommu_get_ops(); + int rc; + if ( iommu_enabled ) - ops->suspend(); + { + rc = ops->suspend(); + if ( rc ) + return rc; + } + + return 0; } void iommu_share_p2m_table(struct domain* d) @@ -369,12 +381,21 @@ void iommu_share_p2m_table(struct domain* d) ops->share_p2m(d); } -void iommu_crash_shutdown(void) +int iommu_crash_shutdown(void) { const struct iommu_ops *ops = iommu_get_ops(); + int rc; + if ( iommu_enabled ) - ops->crash_shutdown(); + { + rc = ops->crash_shutdown(); + if ( rc ) + return rc; + } + iommu_enabled = iommu_intremap = 0; + + return rc; } int iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt) diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h index 8acf889..ec9c513 100644 --- a/xen/drivers/passthrough/vtd/extern.h +++ b/xen/drivers/passthrough/vtd/extern.h @@ -91,11 +91,11 @@ int is_igd_vt_enabled_quirk(void); void platform_quirks_init(void); void vtd_ops_preamble_quirk(struct iommu* iommu); void vtd_ops_postamble_quirk(struct iommu* iommu); -void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map); +int me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map); void pci_vtd_quirk(const struct pci_dev *); int platform_supports_intremap(void); int platform_supports_x2apic(void); -void vtd_set_hwdom_mapping(struct domain *d); +int vtd_set_hwdom_mapping(struct domain *d); #endif // _VTD_EXTERN_H_ diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c index dd13865..08aaaec 100644 --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -542,11 +542,12 @@ static int iommu_flush_iotlb_psi( return status; } -static void iommu_flush_all(void) +static int iommu_flush_all(void) { struct acpi_drhd_unit *drhd; struct iommu *iommu; int flush_dev_iotlb; + int rc; flush_all_cache(); for_each_drhd_unit ( drhd ) @@ -554,11 +555,15 @@ static void iommu_flush_all(void) iommu = drhd->iommu; iommu_flush_context_global(iommu, 0); flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0; - iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb); + rc = iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb); + if ( rc ) + return rc; } + + return 0; } -static void __intel_iommu_iotlb_flush(struct domain *d, unsigned long gfn, +static int __intel_iommu_iotlb_flush(struct domain *d, unsigned long gfn, int dma_old_pte_present, unsigned int page_count) { struct hvm_iommu *hd = domain_hvm_iommu(d); @@ -566,6 +571,7 @@ static void __intel_iommu_iotlb_flush(struct domain *d, unsigned long gfn, struct iommu *iommu; int flush_dev_iotlb; int iommu_domid; + int rc; /* * No need pcideves_lock here because we have flush @@ -585,36 +591,47 @@ static void __intel_iommu_iotlb_flush(struct domain *d, unsigned long gfn, if ( page_count > 1 || gfn == -1 ) { - if ( iommu_flush_iotlb_dsi(iommu, iommu_domid, - 0, flush_dev_iotlb) ) + rc = iommu_flush_iotlb_dsi(iommu, iommu_domid, + 0, flush_dev_iotlb); + if ( rc ) + { iommu_flush_write_buffer(iommu); + return rc; + } } else { - if ( iommu_flush_iotlb_psi(iommu, iommu_domid, + rc = iommu_flush_iotlb_psi(iommu, iommu_domid, (paddr_t)gfn << PAGE_SHIFT_4K, 0, - !dma_old_pte_present, flush_dev_iotlb) ) + !dma_old_pte_present, flush_dev_iotlb); + if ( rc ) + { iommu_flush_write_buffer(iommu); + return rc; + } } } + + return 0; } -static void intel_iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count) +static int intel_iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count) { - __intel_iommu_iotlb_flush(d, gfn, 1, page_count); + return __intel_iommu_iotlb_flush(d, gfn, 1, page_count); } -static void intel_iommu_iotlb_flush_all(struct domain *d) +static int intel_iommu_iotlb_flush_all(struct domain *d) { - __intel_iommu_iotlb_flush(d, 0, 0, 0); + return __intel_iommu_iotlb_flush(d, 0, 0, 0); } /* clear one page's page table */ -static void dma_pte_clear_one(struct domain *domain, u64 addr) +static int dma_pte_clear_one(struct domain *domain, u64 addr) { struct hvm_iommu *hd = domain_hvm_iommu(domain); struct dma_pte *page = NULL, *pte = NULL; u64 pg_maddr; + int rc; spin_lock(&hd->arch.mapping_lock); /* get last level pte */ @@ -622,7 +639,7 @@ static void dma_pte_clear_one(struct domain *domain, u64 addr) if ( pg_maddr == 0 ) { spin_unlock(&hd->arch.mapping_lock); - return; + return -ENOENT; } page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); @@ -632,7 +649,7 @@ static void dma_pte_clear_one(struct domain *domain, u64 addr) { spin_unlock(&hd->arch.mapping_lock); unmap_vtd_domain_page(page); - return; + return -ENOENT; } dma_clear_pte(*pte); @@ -640,9 +657,18 @@ static void dma_pte_clear_one(struct domain *domain, u64 addr) iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); if ( !this_cpu(iommu_dont_flush_iotlb) ) - __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K, 1, 1); + { + rc = __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K, 1, 1); + if ( rc ) + { + unmap_vtd_domain_page(page); + return rc; + } + } unmap_vtd_domain_page(page); + + return 0; } static void iommu_free_pagetable(u64 pt_maddr, int level) @@ -1251,20 +1277,24 @@ static int intel_iommu_domain_init(struct domain *d) return 0; } -static void __hwdom_init intel_iommu_hwdom_init(struct domain *d) +static int __hwdom_init intel_iommu_hwdom_init(struct domain *d) { struct acpi_drhd_unit *drhd; + int rc; if ( !iommu_passthrough && !need_iommu(d) ) { /* Set up 1:1 page table for hardware domain. */ - vtd_set_hwdom_mapping(d); + rc = vtd_set_hwdom_mapping(d); + if ( rc ) + return rc; } setup_hwdom_pci_devices(d, setup_hwdom_device); setup_hwdom_rmrr(d); - iommu_flush_all(); + if ( iommu_flush_all() ) + printk("Xen warning : iommu flush error.\n"); for_each_drhd_unit ( drhd ) { @@ -1273,6 +1303,8 @@ static void __hwdom_init intel_iommu_hwdom_init(struct domain *d) BUG(); iommu_enable_translation(drhd); } + + return 0; } int domain_context_mapping_one( @@ -1404,7 +1436,14 @@ int domain_context_mapping_one( else { int flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0; - iommu_flush_iotlb_dsi(iommu, 0, 1, flush_dev_iotlb); + int rc; + + rc = iommu_flush_iotlb_dsi(iommu, 0, 1, flush_dev_iotlb); + if ( rc ) + { + unmap_vtd_domain_page(context_entries); + return rc; + } } set_bit(iommu->index, &hd->arch.iommu_bitmap); @@ -1412,7 +1451,13 @@ int domain_context_mapping_one( unmap_vtd_domain_page(context_entries); if ( !seg ) - me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC); + { + int rc; + + rc = me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC); + if ( rc ) + return rc; + } return 0; } @@ -1509,6 +1554,7 @@ int domain_context_unmap_one( struct context_entry *context, *context_entries; u64 maddr; int iommu_domid; + int rc; ASSERT(spin_is_locked(&pcidevs_lock)); spin_lock(&iommu->lock); @@ -1543,15 +1589,24 @@ int domain_context_unmap_one( else { int flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0; - iommu_flush_iotlb_dsi(iommu, iommu_domid, 0, flush_dev_iotlb); + rc = iommu_flush_iotlb_dsi(iommu, iommu_domid, 0, flush_dev_iotlb); + if ( rc ) + { + spin_unlock(&iommu->lock); + unmap_vtd_domain_page(context_entries); + return rc; + } } spin_unlock(&iommu->lock); unmap_vtd_domain_page(context_entries); if ( !iommu->intel->drhd->segment ) - me_wifi_quirk(domain, bus, devfn, UNMAP_ME_PHANTOM_FUNC); - + { + rc = me_wifi_quirk(domain, bus, devfn, UNMAP_ME_PHANTOM_FUNC); + if ( rc ) + return rc; + } return 0; } @@ -1700,6 +1755,7 @@ static int intel_iommu_map_page( struct hvm_iommu *hd = domain_hvm_iommu(d); struct dma_pte *page = NULL, *pte = NULL, old, new = { 0 }; u64 pg_maddr; + int rc; /* Do nothing if VT-d shares EPT page table */ if ( iommu_use_hap_pt(d) ) @@ -1742,30 +1798,39 @@ static int intel_iommu_map_page( unmap_vtd_domain_page(page); if ( !this_cpu(iommu_dont_flush_iotlb) ) - __intel_iommu_iotlb_flush(d, gfn, dma_pte_present(old), 1); + { + rc = __intel_iommu_iotlb_flush(d, gfn, dma_pte_present(old), 1); + if ( rc ) + return rc; + } return 0; } static int intel_iommu_unmap_page(struct domain *d, unsigned long gfn) { + int rc; + /* Do nothing if hardware domain and iommu supports pass thru. */ if ( iommu_passthrough && is_hardware_domain(d) ) return 0; - dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K); + rc = dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K); + if ( rc ) + return rc; return 0; } -void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, - int order, int present) +int iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, + int order, int present) { struct acpi_drhd_unit *drhd; struct iommu *iommu = NULL; struct hvm_iommu *hd = domain_hvm_iommu(d); int flush_dev_iotlb; int iommu_domid; + int rc; iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); @@ -1779,11 +1844,17 @@ void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, iommu_domid= domain_iommu_domid(d, iommu); if ( iommu_domid == -1 ) continue; - if ( iommu_flush_iotlb_psi(iommu, iommu_domid, + rc = iommu_flush_iotlb_psi(iommu, iommu_domid, (paddr_t)gfn << PAGE_SHIFT_4K, - order, !present, flush_dev_iotlb) ) + order, !present, flush_dev_iotlb); + if ( rc ) + { iommu_flush_write_buffer(iommu); + return rc; + } } + + return 0; } static int __init vtd_ept_page_compatible(struct iommu *iommu) @@ -2103,7 +2174,11 @@ static int init_vtd_hw(void) return -EIO; } } - iommu_flush_all(); + + ret = iommu_flush_all(); + if ( ret ) + return ret; + return 0; } @@ -2372,16 +2447,19 @@ static int intel_iommu_group_id(u16 seg, u8 bus, u8 devfn) } static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS]; -static void vtd_suspend(void) +static int vtd_suspend(void) { struct acpi_drhd_unit *drhd; struct iommu *iommu; + int rc; u32 i; if ( !iommu_enabled ) - return; + return -EINVAL; - iommu_flush_all(); + rc = iommu_flush_all(); + if ( rc ) + return rc; for_each_drhd_unit ( drhd ) { @@ -2410,17 +2488,22 @@ static void vtd_suspend(void) if ( !iommu_intremap && iommu_qinval ) disable_qinval(iommu); } + + return 0; } -static void vtd_crash_shutdown(void) +static int vtd_crash_shutdown(void) { struct acpi_drhd_unit *drhd; struct iommu *iommu; + int rc; if ( !iommu_enabled ) - return; + return -EINVAL; - iommu_flush_all(); + rc = iommu_flush_all(); + if ( rc ) + return rc; for_each_drhd_unit ( drhd ) { @@ -2429,6 +2512,8 @@ static void vtd_crash_shutdown(void) disable_intremap(drhd->iommu); disable_qinval(drhd->iommu); } + + return 0; } static void vtd_resume(void) diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c index b81b0bd..946e812 100644 --- a/xen/drivers/passthrough/vtd/qinval.c +++ b/xen/drivers/passthrough/vtd/qinval.c @@ -324,7 +324,7 @@ static int flush_iotlb_qi( if ( flush_non_present_entry ) { if ( !cap_caching_mode(iommu->cap) ) - return 1; + return 0; else did = 0; } diff --git a/xen/drivers/passthrough/vtd/quirks.c b/xen/drivers/passthrough/vtd/quirks.c index 1888843..d9aea7e 100644 --- a/xen/drivers/passthrough/vtd/quirks.c +++ b/xen/drivers/passthrough/vtd/quirks.c @@ -332,10 +332,11 @@ void __init platform_quirks_init(void) * assigning Intel integrated wifi device to a guest. */ -static void map_me_phantom_function(struct domain *domain, u32 dev, int map) +static int map_me_phantom_function(struct domain *domain, u32 dev, int map) { struct acpi_drhd_unit *drhd; struct pci_dev *pdev; + int rc = 0; /* find ME VT-d engine base on a real ME device */ pdev = pci_get_pdev(0, 0, PCI_DEVFN(dev, 0)); @@ -343,23 +344,26 @@ static void map_me_phantom_function(struct domain *domain, u32 dev, int map) /* map or unmap ME phantom function */ if ( map ) - domain_context_mapping_one(domain, drhd->iommu, 0, - PCI_DEVFN(dev, 7), NULL); + rc = domain_context_mapping_one(domain, drhd->iommu, 0, + PCI_DEVFN(dev, 7), NULL); else - domain_context_unmap_one(domain, drhd->iommu, 0, - PCI_DEVFN(dev, 7)); + rc = domain_context_unmap_one(domain, drhd->iommu, 0, + PCI_DEVFN(dev, 7)); + + return rc; } -void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map) +int me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map) { u32 id; + int rc = 0; id = pci_conf_read32(0, 0, 0, 0, 0); if ( IS_CTG(id) ) { /* quit if ME does not exist */ if ( pci_conf_read32(0, 0, 3, 0, 0) == 0xffffffff ) - return; + return -ENOENT; /* if device is WLAN device, map ME phantom device 0:3.7 */ id = pci_conf_read32(0, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), 0); @@ -373,7 +377,7 @@ void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map) case 0x423b8086: case 0x423c8086: case 0x423d8086: - map_me_phantom_function(domain, 3, map); + rc = map_me_phantom_function(domain, 3, map); break; default: break; @@ -383,7 +387,7 @@ void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map) { /* quit if ME does not exist */ if ( pci_conf_read32(0, 0, 22, 0, 0) == 0xffffffff ) - return; + return -ENOENT; /* if device is WLAN device, map ME phantom device 0:22.7 */ id = pci_conf_read32(0, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), 0); @@ -399,12 +403,14 @@ void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map) case 0x42388086: /* Puma Peak */ case 0x422b8086: case 0x422c8086: - map_me_phantom_function(domain, 22, map); + rc = map_me_phantom_function(domain, 22, map); break; default: break; } } + + return rc; } void pci_vtd_quirk(const struct pci_dev *pdev) diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c b/xen/drivers/passthrough/vtd/x86/vtd.c index c0d6aab..b952ff7 100644 --- a/xen/drivers/passthrough/vtd/x86/vtd.c +++ b/xen/drivers/passthrough/vtd/x86/vtd.c @@ -108,9 +108,10 @@ void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq) spin_unlock(&d->event_lock); } -void __hwdom_init vtd_set_hwdom_mapping(struct domain *d) +int __hwdom_init vtd_set_hwdom_mapping(struct domain *d) { unsigned long i, j, tmp, top; + int rc; BUG_ON(!is_hardware_domain(d)); @@ -140,11 +141,17 @@ void __hwdom_init vtd_set_hwdom_mapping(struct domain *d) tmp = 1 << (PAGE_SHIFT - PAGE_SHIFT_4K); for ( j = 0; j < tmp; j++ ) - iommu_map_page(d, pfn * tmp + j, pfn * tmp + j, - IOMMUF_readable|IOMMUF_writable); + { + rc = iommu_map_page(d, pfn * tmp + j, pfn * tmp + j, + IOMMUF_readable|IOMMUF_writable); + if ( rc ) + return rc; + } if (!(i & (0xfffff >> (PAGE_SHIFT - PAGE_SHIFT_4K)))) process_pending_softirqs(); } + + return 0; } diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c index 8cbb655..6674fb0 100644 --- a/xen/drivers/passthrough/x86/iommu.c +++ b/xen/drivers/passthrough/x86/iommu.c @@ -104,7 +104,11 @@ int arch_iommu_populate_page_table(struct domain *d) this_cpu(iommu_dont_flush_iotlb) = 0; if ( !rc ) - iommu_iotlb_flush_all(d); + { + rc = iommu_iotlb_flush_all(d); + if ( rc ) + return rc; + } else if ( rc != -ERESTART ) iommu_teardown(d); diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h index 9c51172..4691f9b 100644 --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h @@ -119,8 +119,8 @@ extern unsigned long *shared_intremap_inuse; /* power management support */ void amd_iommu_resume(void); -void amd_iommu_suspend(void); -void amd_iommu_crash_shutdown(void); +int amd_iommu_suspend(void); +int amd_iommu_crash_shutdown(void); /* guest iommu support */ void amd_iommu_send_guest_cmd(struct amd_iommu *iommu, u32 cmd[]); diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h index 29203d7..cf2a269 100644 --- a/xen/include/asm-x86/iommu.h +++ b/xen/include/asm-x86/iommu.h @@ -26,7 +26,7 @@ int iommu_setup_hpet_msi(struct msi_desc *); /* While VT-d specific, this must get declared in a generic header. */ int adjust_vtd_irq_affinities(void); -void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present); +int iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int present); int iommu_supports_eim(void); int iommu_enable_x2apic_IR(void); void iommu_disable_x2apic_IR(void); diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h index 8f3a20e..f5b6f7e 100644 --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -55,7 +55,7 @@ int iommu_add_device(struct pci_dev *pdev); int iommu_enable_device(struct pci_dev *pdev); int iommu_remove_device(struct pci_dev *pdev); int iommu_domain_init(struct domain *d); -void iommu_hwdom_init(struct domain *d); +int iommu_hwdom_init(struct domain *d); void iommu_domain_destroy(struct domain *d); int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn); @@ -134,7 +134,7 @@ typedef int iommu_grdm_t(xen_pfn_t start, xen_ulong_t nr, u32 id, void *ctxt); struct iommu_ops { int (*init)(struct domain *d); - void (*hwdom_init)(struct domain *d); + int (*hwdom_init)(struct domain *d); int (*add_device)(u8 devfn, device_t *dev); int (*enable_device)(device_t *dev); int (*remove_device)(u8 devfn, device_t *dev); @@ -157,19 +157,19 @@ struct iommu_ops { unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg); int (*setup_hpet_msi)(struct msi_desc *); #endif /* CONFIG_X86 */ - void (*suspend)(void); + int (*suspend)(void); void (*resume)(void); void (*share_p2m)(struct domain *d); - void (*crash_shutdown)(void); - void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); - void (*iotlb_flush_all)(struct domain *d); + int (*crash_shutdown)(void); + int (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); + int (*iotlb_flush_all)(struct domain *d); int (*get_reserved_device_memory)(iommu_grdm_t *, void *); void (*dump_p2m_table)(struct domain *d); }; -void iommu_suspend(void); +int iommu_suspend(void); void iommu_resume(void); -void iommu_crash_shutdown(void); +int iommu_crash_shutdown(void); int iommu_get_reserved_device_memory(iommu_grdm_t *, void *); void iommu_share_p2m_table(struct domain *d); @@ -182,8 +182,8 @@ int iommu_do_pci_domctl(struct xen_domctl *, struct domain *d, int iommu_do_domctl(struct xen_domctl *, struct domain *d, XEN_GUEST_HANDLE_PARAM(xen_domctl_t)); -void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count); -void iommu_iotlb_flush_all(struct domain *d); +int iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count); +int iommu_iotlb_flush_all(struct domain *d); /* * The purpose of the iommu_dont_flush_iotlb optional cpu flag is to
This patch checks all kinds of error and all the way up the call trees of VT-d Device-TLB flush. Signed-off-by: Quan Xu <quan.xu@intel.com> --- xen/arch/x86/acpi/power.c | 8 +- xen/arch/x86/crash.c | 3 +- xen/arch/x86/domain_build.c | 5 +- xen/arch/x86/mm.c | 15 ++- xen/arch/x86/mm/p2m-ept.c | 14 ++- xen/arch/x86/mm/p2m-pt.c | 14 ++- xen/arch/x86/mm/p2m.c | 19 +++- xen/arch/x86/x86_64/mm.c | 7 +- xen/common/domain.c | 3 +- xen/common/grant_table.c | 5 +- xen/common/memory.c | 13 ++- xen/drivers/passthrough/amd/iommu_init.c | 4 +- xen/drivers/passthrough/amd/pci_amd_iommu.c | 4 +- xen/drivers/passthrough/arm/smmu.c | 13 ++- xen/drivers/passthrough/iommu.c | 47 +++++--- xen/drivers/passthrough/vtd/extern.h | 4 +- xen/drivers/passthrough/vtd/iommu.c | 157 ++++++++++++++++++++------ xen/drivers/passthrough/vtd/qinval.c | 2 +- xen/drivers/passthrough/vtd/quirks.c | 26 +++-- xen/drivers/passthrough/vtd/x86/vtd.c | 13 ++- xen/drivers/passthrough/x86/iommu.c | 6 +- xen/include/asm-x86/hvm/svm/amd-iommu-proto.h | 4 +- xen/include/asm-x86/iommu.h | 2 +- xen/include/xen/iommu.h | 20 ++-- 24 files changed, 300 insertions(+), 108 deletions(-)