diff mbox series

[7/8] iommu/intel: Support the gfp argument to the map_pages op

Message ID 7-v1-6e8b3997c46d+89e-iommu_map_gfp_jgg@nvidia.com (mailing list archive)
State Superseded
Headers show
Series Let iommufd charge IOPTE allocations to the memory cgroup | expand

Commit Message

Jason Gunthorpe Jan. 6, 2023, 4:42 p.m. UTC
Flow it down to alloc_pgtable_page() via pfn_to_dma_pte() and
__domain_mapping().

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/intel/iommu.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

Comments

Tian, Kevin Jan. 17, 2023, 3:38 a.m. UTC | #1
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Saturday, January 7, 2023 12:43 AM
> 
> @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct
> dmar_domain *domain,
> 
>  	return __domain_mapping(domain, first_vpfn,
>  				first_vpfn, last_vpfn - first_vpfn + 1,
> -				DMA_PTE_READ|DMA_PTE_WRITE);
> +				DMA_PTE_READ|DMA_PTE_WRITE,
> GFP_KERNEL);
>  }

Baolu, can you help confirm whether switching from GFP_ATOMIC to
GFP_KERNEL is OK in this path? it looks fine to me in a quick glance
but want to be conservative here.

> @@ -4333,7 +4337,8 @@ static size_t intel_iommu_unmap(struct
> iommu_domain *domain,
> 
>  	/* Cope with horrid API which requires us to unmap more than the
>  	   size argument if it happens to be a large-page mapping. */
> -	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
> &level));
> +	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
> &level,
> +			       GFP_ATOMIC));

with level==0 it implies it's only lookup w/o pgtable allocation. From this
angle it reads better to use a more relaxed gfp e.g. GFP_KERNEL here.

> @@ -4392,7 +4397,8 @@ static phys_addr_t
> intel_iommu_iova_to_phys(struct iommu_domain *domain,
>  	int level = 0;
>  	u64 phys = 0;
> 
> -	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
> &level);
> +	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
> &level,
> +			     GFP_ATOMIC);

ditto
Baolu Lu Jan. 17, 2023, 8:35 a.m. UTC | #2
On 2023/1/17 11:38, Tian, Kevin wrote:
>> From: Jason Gunthorpe<jgg@nvidia.com>
>> Sent: Saturday, January 7, 2023 12:43 AM
>>
>> @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct
>> dmar_domain *domain,
>>
>>   	return __domain_mapping(domain, first_vpfn,
>>   				first_vpfn, last_vpfn - first_vpfn + 1,
>> -				DMA_PTE_READ|DMA_PTE_WRITE);
>> +				DMA_PTE_READ|DMA_PTE_WRITE,
>> GFP_KERNEL);
>>   }
> Baolu, can you help confirm whether switching from GFP_ATOMIC to
> GFP_KERNEL is OK in this path? it looks fine to me in a quick glance
> but want to be conservative here.

This is also good for me. The memory notifier callback runs in a process
context and allowed to block.

Best regards,
baolu
Jason Gunthorpe Jan. 17, 2023, 1:28 p.m. UTC | #3
On Tue, Jan 17, 2023 at 03:38:51AM +0000, Tian, Kevin wrote:
> > From: Jason Gunthorpe <jgg@nvidia.com>
> > Sent: Saturday, January 7, 2023 12:43 AM
> > 
> > @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct
> > dmar_domain *domain,
> > 
> >  	return __domain_mapping(domain, first_vpfn,
> >  				first_vpfn, last_vpfn - first_vpfn + 1,
> > -				DMA_PTE_READ|DMA_PTE_WRITE);
> > +				DMA_PTE_READ|DMA_PTE_WRITE,
> > GFP_KERNEL);
> >  }
> 
> Baolu, can you help confirm whether switching from GFP_ATOMIC to
> GFP_KERNEL is OK in this path? it looks fine to me in a quick glance
> but want to be conservative here.

I checked it carefully myself as well, good to check again.

> > @@ -4333,7 +4337,8 @@ static size_t intel_iommu_unmap(struct
> > iommu_domain *domain,
> > 
> >  	/* Cope with horrid API which requires us to unmap more than the
> >  	   size argument if it happens to be a large-page mapping. */
> > -	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
> > &level));
> > +	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
> > &level,
> > +			       GFP_ATOMIC));
> 
> with level==0 it implies it's only lookup w/o pgtable allocation. From this
> angle it reads better to use a more relaxed gfp e.g. GFP_KERNEL here.

We should only write GFP_KERNEL if it is actually a sleepable context
because it will be mighty confusing if it isn't. I couldn't tell what
the context is so I left it as ATOMIC.

You are correct this is only just a lookup and so the value is never
used / doesn't matter.

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index e3807776971563..a1a66798e1f06c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -908,7 +908,8 @@  void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
 #endif
 
 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
-				      unsigned long pfn, int *target_level)
+				      unsigned long pfn, int *target_level,
+				      gfp_t gfp)
 {
 	struct dma_pte *parent, *pte;
 	int level = agaw_to_level(domain->agaw);
@@ -935,7 +936,7 @@  static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 		if (!dma_pte_present(pte)) {
 			uint64_t pteval;
 
-			tmp_page = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
+			tmp_page = alloc_pgtable_page(domain->nid, gfp);
 
 			if (!tmp_page)
 				return NULL;
@@ -2150,7 +2151,8 @@  static void switch_to_super_page(struct dmar_domain *domain,
 
 	while (start_pfn <= end_pfn) {
 		if (!pte)
-			pte = pfn_to_dma_pte(domain, start_pfn, &level);
+			pte = pfn_to_dma_pte(domain, start_pfn, &level,
+					     GFP_ATOMIC);
 
 		if (dma_pte_present(pte)) {
 			dma_pte_free_pagetable(domain, start_pfn,
@@ -2172,7 +2174,8 @@  static void switch_to_super_page(struct dmar_domain *domain,
 
 static int
 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
-		 unsigned long phys_pfn, unsigned long nr_pages, int prot)
+		 unsigned long phys_pfn, unsigned long nr_pages, int prot,
+		 gfp_t gfp)
 {
 	struct dma_pte *first_pte = NULL, *pte = NULL;
 	unsigned int largepage_lvl = 0;
@@ -2202,7 +2205,8 @@  __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
 					phys_pfn, nr_pages);
 
-			pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
+			pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl,
+					     gfp);
 			if (!pte)
 				return -ENOMEM;
 			first_pte = pte;
@@ -2368,7 +2372,7 @@  static int iommu_domain_identity_map(struct dmar_domain *domain,
 
 	return __domain_mapping(domain, first_vpfn,
 				first_vpfn, last_vpfn - first_vpfn + 1,
-				DMA_PTE_READ|DMA_PTE_WRITE);
+				DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL);
 }
 
 static int md_domain_init(struct dmar_domain *domain, int guest_width);
@@ -4298,7 +4302,7 @@  static int intel_iommu_map(struct iommu_domain *domain,
 	   the low bits of hpa would take us onto the next page */
 	size = aligned_nrpages(hpa, size);
 	return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
-				hpa >> VTD_PAGE_SHIFT, size, prot);
+				hpa >> VTD_PAGE_SHIFT, size, prot, gfp);
 }
 
 static int intel_iommu_map_pages(struct iommu_domain *domain,
@@ -4333,7 +4337,8 @@  static size_t intel_iommu_unmap(struct iommu_domain *domain,
 
 	/* Cope with horrid API which requires us to unmap more than the
 	   size argument if it happens to be a large-page mapping. */
-	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
+	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
+			       GFP_ATOMIC));
 
 	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
 		size = VTD_PAGE_SIZE << level_to_offset_bits(level);
@@ -4392,7 +4397,8 @@  static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 	int level = 0;
 	u64 phys = 0;
 
-	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
+	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
+			     GFP_ATOMIC);
 	if (pte && dma_pte_present(pte))
 		phys = dma_pte_addr(pte) +
 			(iova & (BIT_MASK(level_to_offset_bits(level) +