From patchwork Thu May 14 02:32:11 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yu Zhao X-Patchwork-Id: 23681 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n4E2WAQK006636 for ; Thu, 14 May 2009 02:32:11 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1762600AbZENCcH (ORCPT ); Wed, 13 May 2009 22:32:07 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1762540AbZENCcG (ORCPT ); Wed, 13 May 2009 22:32:06 -0400 Received: from mga11.intel.com ([192.55.52.93]:50957 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1762200AbZENCbz (ORCPT ); Wed, 13 May 2009 22:31:55 -0400 Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by fmsmga102.fm.intel.com with ESMTP; 13 May 2009 19:26:12 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.41,192,1241420400"; d="scan'208";a="690256956" Received: from yzhao-otc.sh.intel.com ([10.239.48.165]) by fmsmga001.fm.intel.com with ESMTP; 13 May 2009 19:35:10 -0700 From: Yu Zhao To: dwmw2@infradead.org, jbarnes@virtuousgeek.org Cc: linux-pci@vger.kernel.org, iommu@lists.linux-foundation.org, kvm@vger.kernel.org, Yu Zhao Subject: [PATCH v4 resend 6/6] VT-d: support the device IOTLB Date: Thu, 14 May 2009 10:32:11 +0800 Message-Id: <1242268331-1401-7-git-send-email-yu.zhao@intel.com> X-Mailer: git-send-email 1.6.1 In-Reply-To: <1242268331-1401-1-git-send-email-yu.zhao@intel.com> References: <1242268331-1401-1-git-send-email-yu.zhao@intel.com> Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Enable the device IOTLB (i.e. ATS) for both the bare metal and KVM environments. Signed-off-by: Yu Zhao --- drivers/pci/intel-iommu.c | 100 +++++++++++++++++++++++++++++++++++++++++- include/linux/intel-iommu.h | 1 + 2 files changed, 98 insertions(+), 3 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index a2cbc01..661a02b 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -128,6 +128,7 @@ static inline void context_set_fault_enable(struct context_entry *context) } #define CONTEXT_TT_MULTI_LEVEL 0 +#define CONTEXT_TT_DEV_IOTLB 1 static inline void context_set_translation_type(struct context_entry *context, unsigned long value) @@ -251,6 +252,7 @@ struct device_domain_info { int segment; /* PCI domain */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ + struct intel_iommu *iommu; /* IOMMU used by this device */ struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ struct dmar_domain *domain; /* pointer to domain */ }; @@ -965,6 +967,81 @@ static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, return 0; } +static struct device_domain_info * +iommu_support_dev_iotlb(struct dmar_domain *domain, + int segment, u8 bus, u8 devfn) +{ + int found = 0; + unsigned long flags; + struct device_domain_info *info; + struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn); + + if (!ecap_dev_iotlb_support(iommu->ecap)) + return NULL; + + if (!iommu->qi) + return NULL; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(info, &domain->devices, link) + if (info->bus == bus && info->devfn == devfn) { + found = 1; + break; + } + spin_unlock_irqrestore(&device_domain_lock, flags); + + if (!found || !info->dev) + return NULL; + + if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS)) + return NULL; + + if (!dmar_find_matched_atsr_unit(info->dev)) + return NULL; + + info->iommu = iommu; + + return info; +} + +static void iommu_enable_dev_iotlb(struct device_domain_info *info) +{ + if (!info) + return; + + pci_enable_ats(info->dev, VTD_PAGE_SHIFT); +} + +static void iommu_disable_dev_iotlb(struct device_domain_info *info) +{ + if (!info->dev || !pci_ats_enabled(info->dev)) + return; + + pci_disable_ats(info->dev); +} + +static void iommu_flush_dev_iotlb(struct dmar_domain *domain, + u64 addr, unsigned mask) +{ + int rc; + u16 sid, qdep; + unsigned long flags; + struct device_domain_info *info; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_entry(info, &domain->devices, link) { + if (!info->dev || !pci_ats_enabled(info->dev)) + continue; + + sid = info->bus << 8 | info->devfn; + qdep = pci_ats_queue_depth(info->dev); + rc = qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); + if (rc) + dev_err(&info->dev->dev, "flush IOTLB failed\n"); + } + spin_unlock_irqrestore(&device_domain_lock, flags); +} + static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int pages, int non_present_entry_flush) { @@ -988,6 +1065,9 @@ static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, rc = iommu->flush.flush_iotlb(iommu, did, addr, mask, DMA_TLB_PSI_FLUSH, non_present_entry_flush); + if (!rc && !non_present_entry_flush) + iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); + return rc; } @@ -1329,6 +1409,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, unsigned long ndomains; int id; int agaw; + struct device_domain_info *info; pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -1394,7 +1475,9 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_set_domain_id(context, id); context_set_address_width(context, iommu->agaw); context_set_address_root(context, virt_to_phys(pgd)); - context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); + info = iommu_support_dev_iotlb(domain, segment, bus, devfn); + context_set_translation_type(context, + info ? CONTEXT_TT_DEV_IOTLB : CONTEXT_TT_MULTI_LEVEL); context_set_fault_enable(context); context_set_present(context); domain_flush_cache(domain, context, sizeof(*context)); @@ -1407,6 +1490,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, else iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); + iommu_enable_dev_iotlb(info); + spin_unlock_irqrestore(&iommu->lock, flags); spin_lock_irqsave(&domain->iommu_lock, flags); @@ -1554,6 +1639,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) info->dev->dev.archdata.iommu = NULL; spin_unlock_irqrestore(&device_domain_lock, flags); + iommu_disable_dev_iotlb(info); iommu = device_to_iommu(info->segment, info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); free_devinfo_mem(info); @@ -2217,8 +2303,14 @@ static void flush_unmaps(void) iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 0); for (j = 0; j < deferred_flush[i].next; j++) { - __free_iova(&deferred_flush[i].domain[j]->iovad, - deferred_flush[i].iova[j]); + unsigned long mask; + struct iova *iova = deferred_flush[i].iova[j]; + + mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT; + mask = ilog2(mask >> VTD_PAGE_SHIFT); + iommu_flush_dev_iotlb(deferred_flush[i].domain[j], + iova->pfn_lo << PAGE_SHIFT, mask); + __free_iova(&deferred_flush[i].domain[j]->iovad, iova); } deferred_flush[i].next = 0; } @@ -2890,6 +2982,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, info->dev->dev.archdata.iommu = NULL; spin_unlock_irqrestore(&device_domain_lock, flags); + iommu_disable_dev_iotlb(info); iommu_detach_dev(iommu, info->bus, info->devfn); iommu_detach_dependent_devices(iommu, pdev); free_devinfo_mem(info); @@ -2940,6 +3033,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags1); + iommu_disable_dev_iotlb(info); iommu = device_to_iommu(info->segment, info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); iommu_detach_dependent_devices(iommu, info->dev); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 540e10b..776e508 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -123,6 +123,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_qis(e) ((e) & 0x2) #define ecap_eim_support(e) ((e >> 4) & 0x1) #define ecap_ir_support(e) ((e >> 3) & 0x1) +#define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1) #define ecap_max_handle_mask(e) ((e >> 20) & 0xf) #define ecap_sc_support(e) ((e >> 7) & 0x1) /* Snooping Control */