diff mbox series

[RFC,04/28] mm/migrate: Add migrate_device_vma_range

Message ID 20240828024901.2582335-5-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce GPU SVM and Xe SVM implementation | expand

Commit Message

Matthew Brost Aug. 28, 2024, 2:48 a.m. UTC
Add migrate_device_vma_range which prepares an array of pre-populated
device pages for migration and issues a MMU invalidation.

Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 include/linux/migrate.h |  3 +++
 mm/migrate_device.c     | 53 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

Comments

Daniel Vetter Aug. 29, 2024, 9:03 a.m. UTC | #1
On Tue, Aug 27, 2024 at 07:48:37PM -0700, Matthew Brost wrote:
> Add migrate_device_vma_range which prepares an array of pre-populated
> device pages for migration and issues a MMU invalidation.
> 
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  include/linux/migrate.h |  3 +++
>  mm/migrate_device.c     | 53 +++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 56 insertions(+)
> 
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index 644be30b69c8..e8cce05bf9c2 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -226,6 +226,9 @@ void migrate_vma_pages(struct migrate_vma *migrate);
>  void migrate_vma_finalize(struct migrate_vma *migrate);
>  int migrate_device_range(unsigned long *src_pfns, unsigned long start,
>  			unsigned long npages);
> +int migrate_device_vma_range(struct mm_struct *mm, void *pgmap_owner,
> +			     unsigned long *src_pfns, unsigned long npages,
> +			     unsigned long start);
>  void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns,
>  			unsigned long npages);
>  void migrate_device_finalize(unsigned long *src_pfns,
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index 6d66dc1c6ffa..e25f12a132e8 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -920,6 +920,59 @@ int migrate_device_range(unsigned long *src_pfns, unsigned long start,
>  }
>  EXPORT_SYMBOL(migrate_device_range);
>  
> +/**
> + * migrate_device_vma_range() - migrate device private pfns to normal memory and
> + * trigger MMU invalidation.
> + * @mm: struct mm of device pages.
> + * @src_pfns: pre-popluated array of source device private pfns to migrate.
> + * @pgmap_owner: page group map owner of device pages.
> + * @npages: number of pages to migrate.
> + * @start: VMA start of device pages.
> + *
> + * Similar to migrate_device_range() but supports non-contiguous pre-popluated
> + * array of device pages to migrate. Also triggers MMU invalidation. Useful in
> + * device memory eviction paths where lock is held protecting the device pages
> + * but where the mmap lock cannot be taken to due to a locking inversion (e.g.
> + * DRM drivers). Since the mmap lock is not required to be held, the MMU
> + * invalidation can race with with VMA start being repurposed, worst case this
> + * would result in an unecessary invalidation.
> + */
> +int migrate_device_vma_range(struct mm_struct *mm, void *pgmap_owner,
> +			     unsigned long *src_pfns, unsigned long npages,
> +			     unsigned long start)
> +{
> +	struct mmu_notifier_range range;
> +	unsigned long i;
> +
> +	mmu_notifier_range_init_owner(&range, MMU_NOTIFY_MIGRATE, 0,
> +				      mm, start, start + npages * PAGE_SIZE,
> +				      pgmap_owner);
> +	mmu_notifier_invalidate_range_start(&range);

This isn't needed, try_to_migrate called from migrate_device_unmap already
has a notifier, if there's actually any ptes to clear. If you need this
one you've missed a pte clear notification somewhere, or there's some
other bad bug somewhere.
-Sima

> +
> +	for (i = 0; i < npages; i++) {
> +		struct page *page = pfn_to_page(src_pfns[i]);
> +
> +		if (!get_page_unless_zero(page)) {
> +			src_pfns[i] = 0;
> +			continue;
> +		}
> +
> +		if (!trylock_page(page)) {
> +			src_pfns[i] = 0;
> +			put_page(page);
> +			continue;
> +		}
> +
> +		src_pfns[i] = migrate_pfn(src_pfns[i]) | MIGRATE_PFN_MIGRATE;
> +	}
> +
> +	migrate_device_unmap(src_pfns, npages, NULL);
> +	mmu_notifier_invalidate_range_end(&range);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(migrate_device_vma_range);
> +
>  /*
>   * Migrate a device coherent page back to normal memory. The caller should have
>   * a reference on page which will be copied to the new page if migration is
> -- 
> 2.34.1
>
Matthew Brost Aug. 29, 2024, 3:58 p.m. UTC | #2
On Thu, Aug 29, 2024 at 11:03:29AM +0200, Daniel Vetter wrote:
> On Tue, Aug 27, 2024 at 07:48:37PM -0700, Matthew Brost wrote:
> > Add migrate_device_vma_range which prepares an array of pre-populated
> > device pages for migration and issues a MMU invalidation.
> > 
> > Cc: Andrew Morton <akpm@linux-foundation.org>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  include/linux/migrate.h |  3 +++
> >  mm/migrate_device.c     | 53 +++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 56 insertions(+)
> > 
> > diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> > index 644be30b69c8..e8cce05bf9c2 100644
> > --- a/include/linux/migrate.h
> > +++ b/include/linux/migrate.h
> > @@ -226,6 +226,9 @@ void migrate_vma_pages(struct migrate_vma *migrate);
> >  void migrate_vma_finalize(struct migrate_vma *migrate);
> >  int migrate_device_range(unsigned long *src_pfns, unsigned long start,
> >  			unsigned long npages);
> > +int migrate_device_vma_range(struct mm_struct *mm, void *pgmap_owner,
> > +			     unsigned long *src_pfns, unsigned long npages,
> > +			     unsigned long start);
> >  void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns,
> >  			unsigned long npages);
> >  void migrate_device_finalize(unsigned long *src_pfns,
> > diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> > index 6d66dc1c6ffa..e25f12a132e8 100644
> > --- a/mm/migrate_device.c
> > +++ b/mm/migrate_device.c
> > @@ -920,6 +920,59 @@ int migrate_device_range(unsigned long *src_pfns, unsigned long start,
> >  }
> >  EXPORT_SYMBOL(migrate_device_range);
> >  
> > +/**
> > + * migrate_device_vma_range() - migrate device private pfns to normal memory and
> > + * trigger MMU invalidation.
> > + * @mm: struct mm of device pages.
> > + * @src_pfns: pre-popluated array of source device private pfns to migrate.
> > + * @pgmap_owner: page group map owner of device pages.
> > + * @npages: number of pages to migrate.
> > + * @start: VMA start of device pages.
> > + *
> > + * Similar to migrate_device_range() but supports non-contiguous pre-popluated
> > + * array of device pages to migrate. Also triggers MMU invalidation. Useful in
> > + * device memory eviction paths where lock is held protecting the device pages
> > + * but where the mmap lock cannot be taken to due to a locking inversion (e.g.
> > + * DRM drivers). Since the mmap lock is not required to be held, the MMU
> > + * invalidation can race with with VMA start being repurposed, worst case this
> > + * would result in an unecessary invalidation.
> > + */
> > +int migrate_device_vma_range(struct mm_struct *mm, void *pgmap_owner,
> > +			     unsigned long *src_pfns, unsigned long npages,
> > +			     unsigned long start)
> > +{
> > +	struct mmu_notifier_range range;
> > +	unsigned long i;
> > +
> > +	mmu_notifier_range_init_owner(&range, MMU_NOTIFY_MIGRATE, 0,
> > +				      mm, start, start + npages * PAGE_SIZE,
> > +				      pgmap_owner);
> > +	mmu_notifier_invalidate_range_start(&range);
> 
> This isn't needed, try_to_migrate called from migrate_device_unmap already
> has a notifier, if there's actually any ptes to clear. If you need this
> one you've missed a pte clear notification somewhere, or there's some
> other bad bug somewhere.

Thanks for the tip, let me pull this out and confirm that we get a
notifier from try_to_migrate when this function is called. Agree if we
do get a notifier, this is not needed.

Matt 

> -Sima
> 
> > +
> > +	for (i = 0; i < npages; i++) {
> > +		struct page *page = pfn_to_page(src_pfns[i]);
> > +
> > +		if (!get_page_unless_zero(page)) {
> > +			src_pfns[i] = 0;
> > +			continue;
> > +		}
> > +
> > +		if (!trylock_page(page)) {
> > +			src_pfns[i] = 0;
> > +			put_page(page);
> > +			continue;
> > +		}
> > +
> > +		src_pfns[i] = migrate_pfn(src_pfns[i]) | MIGRATE_PFN_MIGRATE;
> > +	}
> > +
> > +	migrate_device_unmap(src_pfns, npages, NULL);
> > +	mmu_notifier_invalidate_range_end(&range);
> > +
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL(migrate_device_vma_range);
> > +
> >  /*
> >   * Migrate a device coherent page back to normal memory. The caller should have
> >   * a reference on page which will be copied to the new page if migration is
> > -- 
> > 2.34.1
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
diff mbox series

Patch

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 644be30b69c8..e8cce05bf9c2 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -226,6 +226,9 @@  void migrate_vma_pages(struct migrate_vma *migrate);
 void migrate_vma_finalize(struct migrate_vma *migrate);
 int migrate_device_range(unsigned long *src_pfns, unsigned long start,
 			unsigned long npages);
+int migrate_device_vma_range(struct mm_struct *mm, void *pgmap_owner,
+			     unsigned long *src_pfns, unsigned long npages,
+			     unsigned long start);
 void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns,
 			unsigned long npages);
 void migrate_device_finalize(unsigned long *src_pfns,
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 6d66dc1c6ffa..e25f12a132e8 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -920,6 +920,59 @@  int migrate_device_range(unsigned long *src_pfns, unsigned long start,
 }
 EXPORT_SYMBOL(migrate_device_range);
 
+/**
+ * migrate_device_vma_range() - migrate device private pfns to normal memory and
+ * trigger MMU invalidation.
+ * @mm: struct mm of device pages.
+ * @src_pfns: pre-popluated array of source device private pfns to migrate.
+ * @pgmap_owner: page group map owner of device pages.
+ * @npages: number of pages to migrate.
+ * @start: VMA start of device pages.
+ *
+ * Similar to migrate_device_range() but supports non-contiguous pre-popluated
+ * array of device pages to migrate. Also triggers MMU invalidation. Useful in
+ * device memory eviction paths where lock is held protecting the device pages
+ * but where the mmap lock cannot be taken to due to a locking inversion (e.g.
+ * DRM drivers). Since the mmap lock is not required to be held, the MMU
+ * invalidation can race with with VMA start being repurposed, worst case this
+ * would result in an unecessary invalidation.
+ */
+int migrate_device_vma_range(struct mm_struct *mm, void *pgmap_owner,
+			     unsigned long *src_pfns, unsigned long npages,
+			     unsigned long start)
+{
+	struct mmu_notifier_range range;
+	unsigned long i;
+
+	mmu_notifier_range_init_owner(&range, MMU_NOTIFY_MIGRATE, 0,
+				      mm, start, start + npages * PAGE_SIZE,
+				      pgmap_owner);
+	mmu_notifier_invalidate_range_start(&range);
+
+	for (i = 0; i < npages; i++) {
+		struct page *page = pfn_to_page(src_pfns[i]);
+
+		if (!get_page_unless_zero(page)) {
+			src_pfns[i] = 0;
+			continue;
+		}
+
+		if (!trylock_page(page)) {
+			src_pfns[i] = 0;
+			put_page(page);
+			continue;
+		}
+
+		src_pfns[i] = migrate_pfn(src_pfns[i]) | MIGRATE_PFN_MIGRATE;
+	}
+
+	migrate_device_unmap(src_pfns, npages, NULL);
+	mmu_notifier_invalidate_range_end(&range);
+
+	return 0;
+}
+EXPORT_SYMBOL(migrate_device_vma_range);
+
 /*
  * Migrate a device coherent page back to normal memory. The caller should have
  * a reference on page which will be copied to the new page if migration is