diff mbox series

[RFC] mm/hmm, mm/migrate_device: Allow p2p access and p2p migration

Message ID 20241015111322.97514-1-thomas.hellstrom@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [RFC] mm/hmm, mm/migrate_device: Allow p2p access and p2p migration | expand

Commit Message

Thomas Hellstrom Oct. 15, 2024, 11:13 a.m. UTC
Introduce a way for hmm_range_fault() and migrate_vma_setup() to identify
foreign devices with fast interconnect and thereby allow
both direct access over the interconnect and p2p migration.

The need for a callback arises because without it, the p2p ability would
need to be static and determined at dev_pagemap creation time. With
a callback it can be determined dynamically, and in the migrate case
the callback could separate out local device pages.

The hmm_range_fault() change has been tested internally, the
migrate_vma_setup() change hasn't yet.

Seeking early feedback. Any suggestions appreciated.

Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Simona Vetter <simona.vetter@ffwll.ch>
Cc: DRI-devel <dri-devel@lists.freedesktop.org>
Cc: Linux Memory Management List <linux-mm@kvack.org>
Cc: LKML <linux-kernel@vger.kernel.org>

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 include/linux/hmm.h     |  2 ++
 include/linux/migrate.h | 29 +++++++++++++++++++++++++++++
 mm/hmm.c                | 13 +++++++++++--
 mm/migrate_device.c     | 12 ++++++++++++
 4 files changed, 54 insertions(+), 2 deletions(-)

Comments

Thomas Hellstrom Oct. 15, 2024, 11:21 a.m. UTC | #1
On Tue, 2024-10-15 at 13:13 +0200, Thomas Hellström wrote:
> Introduce a way for hmm_range_fault() and migrate_vma_setup() to
> identify
> foreign devices with fast interconnect and thereby allow
> both direct access over the interconnect and p2p migration.
> 
> The need for a callback arises because without it, the p2p ability
> would
> need to be static and determined at dev_pagemap creation time. With
> a callback it can be determined dynamically, and in the migrate case
> the callback could separate out local device pages.
> 
> The hmm_range_fault() change has been tested internally, the
> migrate_vma_setup() change hasn't yet.
> 
> Seeking early feedback. Any suggestions appreciated.
> 
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Jason Gunthorpe <jgg@nvidia.com>
> Cc: Simona Vetter <simona.vetter@ffwll.ch>
> Cc: DRI-devel <dri-devel@lists.freedesktop.org>
> Cc: Linux Memory Management List <linux-mm@kvack.org>
> Cc: LKML <linux-kernel@vger.kernel.org>
> 
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>  include/linux/hmm.h     |  2 ++
>  include/linux/migrate.h | 29 +++++++++++++++++++++++++++++
>  mm/hmm.c                | 13 +++++++++++--
>  mm/migrate_device.c     | 12 ++++++++++++
>  4 files changed, 54 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 126a36571667..4de909a1e10a 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h
> @@ -12,6 +12,7 @@
>  #include <linux/mm.h>
>  
>  struct mmu_interval_notifier;
> +struct p2p_allow;
>  
>  /*
>   * On output:
> @@ -97,6 +98,7 @@ struct hmm_range {
>  	unsigned long		default_flags;
>  	unsigned long		pfn_flags_mask;
>  	void			*dev_private_owner;
> +	struct p2p_allow        *p2p;
>  };
>  
>  /*
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index 002e49b2ebd9..0ff085b633e3 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -183,10 +183,37 @@ static inline unsigned long
> migrate_pfn(unsigned long pfn)
>  	return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID;
>  }
>  
> +struct p2p_allow;
> +
> +/**
> + * struct p2p_allow_ops - Functions for detailed cross-device
> access.
> + */
> +struct p2p_allow_ops {
> +	/**
> +	 * @p2p_allow: Whether to allow cross-device access to
> device_private pages.
> +	 * @allow: Pointer to a struct p2p_allow. Typically
> subclassed by the caller
> +	 * to provide needed information.
> +	 * @page: The page being queried.
> +	 */
> +	bool (*p2p_allow)(struct p2p_allow *allow, struct page
> *page);
> +};
> +
> +/**
> + * struct p2p_allow - Information needed to allow cross-device
> access.
> + * @ops: Pointer to a struct p2p_allow_ops.
> + *
> + * This struct is intended to be embedded / subclassed to provide
> additional
> + * information needed by the @ops p2p_allow() callback.
> + */
> +struct p2p_allow {
> +	const struct p2p_allow_ops *ops;
> +};
> +
>  enum migrate_vma_direction {
>  	MIGRATE_VMA_SELECT_SYSTEM = 1 << 0,
>  	MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1,
>  	MIGRATE_VMA_SELECT_DEVICE_COHERENT = 1 << 2,
> +	MIGRATE_VMA_SELECT_DEVICE_P2P = 1 << 3,
>  };
>  
>  struct migrate_vma {
> @@ -222,6 +249,8 @@ struct migrate_vma {
>  	 * a migrate_to_ram() callback.
>  	 */
>  	struct page		*fault_page;
> +	/* Optional identification of devices for p2p migration */
> +	struct p2p_allow        *p2p;
>  };
>  
>  int migrate_vma_setup(struct migrate_vma *args);
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 7e0229ae4a5a..8c28f9b22ed2 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -19,6 +19,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/swapops.h>
>  #include <linux/hugetlb.h>
> +#include <linux/migrate.h>
>  #include <linux/memremap.h>
>  #include <linux/sched/mm.h>
>  #include <linux/jump_label.h>
> @@ -220,6 +221,15 @@ static inline unsigned long
> pte_to_hmm_pfn_flags(struct hmm_range *range,
>  	return pte_write(pte) ? (HMM_PFN_VALID | HMM_PFN_WRITE) :
> HMM_PFN_VALID;
>  }
>  
> +static bool hmm_allow_devmem(struct hmm_range *range, struct page
> *page)
> +{
> +	if (likely(page->pgmap->owner == range->dev_private_owner))
> +		return true;
> +	if (likely(!range->p2p))
> +		return false;
> +	return range->p2p->ops->p2p_allow(range->p2p, page);
> +}
> +
>  static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long
> addr,
>  			      unsigned long end, pmd_t *pmdp, pte_t
> *ptep,
>  			      unsigned long *hmm_pfn)
> @@ -248,8 +258,7 @@ static int hmm_vma_handle_pte(struct mm_walk
> *walk, unsigned long addr,
>  		 * just report the PFN.
>  		 */
>  		if (is_device_private_entry(entry) &&
> -		    pfn_swap_entry_to_page(entry)->pgmap->owner ==
> -		    range->dev_private_owner) {
> +		    hmm_allow_devmem(range,
> pfn_swap_entry_to_page(entry))) {
>  			cpu_flags = HMM_PFN_VALID;
>  			if (is_writable_device_private_entry(entry))
>  				cpu_flags |= HMM_PFN_WRITE;
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index 9cf26592ac93..8e643a3872c9 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -54,6 +54,13 @@ static int migrate_vma_collect_hole(unsigned long
> start,
>  	return 0;
>  }
>  
> +static bool migrate_vma_allow_p2p(struct migrate_vma *migrate,
> struct page *page)
> +{
> +	if (likely(!migrate->p2p))
> +		return false;
> +	return migrate->p2p->ops->p2p_allow(migrate->p2p, page);
> +}
> +
>  static int migrate_vma_collect_pmd(pmd_t *pmdp,
>  				   unsigned long start,
>  				   unsigned long end,
> @@ -138,6 +145,11 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>  			    page->pgmap->owner != migrate-
> >pgmap_owner)
>  				goto next;
>  
> +			if (!(migrate->flags &
> +			      MIGRATE_VMA_SELECT_DEVICE_P2P) ||
> +			    !migrate_vma_allow_p2p(migrate, page))
> +				goto next;
> +

And obviously some inverted logic here, sigh, but hopefully the intent
is clear..

/Thomas


>  			mpfn = migrate_pfn(page_to_pfn(page)) |
>  					MIGRATE_PFN_MIGRATE;
>  			if (is_writable_device_private_entry(entry))
Jason Gunthorpe Oct. 15, 2024, 12:17 p.m. UTC | #2
On Tue, Oct 15, 2024 at 01:13:22PM +0200, Thomas Hellström wrote:
> Introduce a way for hmm_range_fault() and migrate_vma_setup() to identify
> foreign devices with fast interconnect and thereby allow
> both direct access over the interconnect and p2p migration.
> 
> The need for a callback arises because without it, the p2p ability would
> need to be static and determined at dev_pagemap creation time. With
> a callback it can be determined dynamically, and in the migrate case
> the callback could separate out local device pages.


> +static bool hmm_allow_devmem(struct hmm_range *range, struct page *page)
> +{
> +	if (likely(page->pgmap->owner == range->dev_private_owner))
> +		return true;
> +	if (likely(!range->p2p))
> +		return false;
> +	return range->p2p->ops->p2p_allow(range->p2p, page);
> +}
> +
>  static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
>  			      unsigned long end, pmd_t *pmdp, pte_t *ptep,
>  			      unsigned long *hmm_pfn)
> @@ -248,8 +258,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
>  		 * just report the PFN.
>  		 */
>  		if (is_device_private_entry(entry) &&
> -		    pfn_swap_entry_to_page(entry)->pgmap->owner ==
> -		    range->dev_private_owner) {
> +		    hmm_allow_devmem(range, pfn_swap_entry_to_page(entry))) {
>  			cpu_flags = HMM_PFN_VALID;
>  			if (is_writable_device_private_entry(entry))
>  				cpu_flags |= HMM_PFN_WRITE;

This is really misnamed and took me a while to get it.

It has nothing to do with kernel P2P, you are just allowing more
selective filtering of dev_private_owner. You should focus on that in
the naming, not p2p. ie allow_dev_private()

P2P is stuff that is dealing with MEMORY_DEVICE_PCI_P2PDMA.

This is just allowing more instances of the same driver to co-ordinate
their device private memory handle, for whatever purpose.

Otherwise I don't see a particular problem, though we have talked
about widening the matching for device_private more broadly using some
kind of grouping tag or something like that instead of a callback. You
may consider that as an alternative

I would also probably try to have less indirection, you can embedd the
hmm_range struct inside a caller private data struct and use that
instead if inventing a whole new struct and pointer.

Jason
Thomas Hellstrom Oct. 15, 2024, 12:41 p.m. UTC | #3
Hi, Jason.

Thanks for the feedback.

On Tue, 2024-10-15 at 09:17 -0300, Jason Gunthorpe wrote:
> On Tue, Oct 15, 2024 at 01:13:22PM +0200, Thomas Hellström wrote:
> > Introduce a way for hmm_range_fault() and migrate_vma_setup() to
> > identify
> > foreign devices with fast interconnect and thereby allow
> > both direct access over the interconnect and p2p migration.
> > 
> > The need for a callback arises because without it, the p2p ability
> > would
> > need to be static and determined at dev_pagemap creation time. With
> > a callback it can be determined dynamically, and in the migrate
> > case
> > the callback could separate out local device pages.
> 
> 
> > +static bool hmm_allow_devmem(struct hmm_range *range, struct page
> > *page)
> > +{
> > +	if (likely(page->pgmap->owner == range-
> > >dev_private_owner))
> > +		return true;
> > +	if (likely(!range->p2p))
> > +		return false;
> > +	return range->p2p->ops->p2p_allow(range->p2p, page);
> > +}
> > +
> >  static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long
> > addr,
> >  			      unsigned long end, pmd_t *pmdp,
> > pte_t *ptep,
> >  			      unsigned long *hmm_pfn)
> > @@ -248,8 +258,7 @@ static int hmm_vma_handle_pte(struct mm_walk
> > *walk, unsigned long addr,
> >  		 * just report the PFN.
> >  		 */
> >  		if (is_device_private_entry(entry) &&
> > -		    pfn_swap_entry_to_page(entry)->pgmap->owner ==
> > -		    range->dev_private_owner) {
> > +		    hmm_allow_devmem(range,
> > pfn_swap_entry_to_page(entry))) {
> >  			cpu_flags = HMM_PFN_VALID;
> >  			if
> > (is_writable_device_private_entry(entry))
> >  				cpu_flags |= HMM_PFN_WRITE;
> 
> This is really misnamed and took me a while to get it.
> 
> It has nothing to do with kernel P2P, you are just allowing more
> selective filtering of dev_private_owner. You should focus on that in
> the naming, not p2p. ie allow_dev_private()
> 
> P2P is stuff that is dealing with MEMORY_DEVICE_PCI_P2PDMA.

Yes, although the intention was to incorporate also other fast
interconnects in "P2P", not just "PCIe P2P", but I'll definitely take a
look at the naming.

> 
> This is just allowing more instances of the same driver to co-
> ordinate
> their device private memory handle, for whatever purpose.

Exactly, or theoretically even cross-driver.

> 
> Otherwise I don't see a particular problem, though we have talked
> about widening the matching for device_private more broadly using
> some
> kind of grouping tag or something like that instead of a callback.
> You
> may consider that as an alternative

Yes. Looked at that, but (if I understand you correctly) that would be
the case mentioned in the commit message where the group would be set
up statically at dev_pagemap creation time? 

> 
> I would also probably try to have less indirection, you can embedd
> the
> hmm_range struct inside a caller private data struct and use that
> instead if inventing a whole new struct and pointer.

Our first attempt was based on that but then that wouldn't be reusable
in the migrate_device.c code. Hence the extra indirection.

Thanks,
Thomas


> 
> Jason
Jason Gunthorpe Oct. 15, 2024, 1:02 p.m. UTC | #4
On Tue, Oct 15, 2024 at 02:41:24PM +0200, Thomas Hellström wrote:
> > It has nothing to do with kernel P2P, you are just allowing more
> > selective filtering of dev_private_owner. You should focus on that in
> > the naming, not p2p. ie allow_dev_private()
> > 
> > P2P is stuff that is dealing with MEMORY_DEVICE_PCI_P2PDMA.
> 
> Yes, although the intention was to incorporate also other fast
> interconnects in "P2P", not just "PCIe P2P", but I'll definitely take a
> look at the naming.

It has nothing to do with that, you are just filtering the device
private pages differently than default.

Your end use might be P2P, but at this API level it certainly is not.

> > This is just allowing more instances of the same driver to co-
> > ordinate
> > their device private memory handle, for whatever purpose.
> 
> Exactly, or theoretically even cross-driver.

I don't want to see things like drivers changing their pgmap handles
privately somehow. If we are going to make it cross driver then it
needs to be generalized alot more.

> > 
> > Otherwise I don't see a particular problem, though we have talked
> > about widening the matching for device_private more broadly using
> > some
> > kind of grouping tag or something like that instead of a callback.
> > You
> > may consider that as an alternative
> 
> Yes. Looked at that, but (if I understand you correctly) that would be
> the case mentioned in the commit message where the group would be set
> up statically at dev_pagemap creation time?

Not necessarily statically, but the membership would be stored in the
pagemap and by updated during hotplug/etc

If this is for P2P then the dynamic behavior is pretty limited, some
kind of NxN bitmap.

> > hmm_range struct inside a caller private data struct and use that
> > instead if inventing a whole new struct and pointer.
> 
> Our first attempt was based on that but then that wouldn't be reusable
> in the migrate_device.c code. Hence the extra indirection.

It is performance path, you should prefer duplication rather than
slowing it down..

Jason
Thomas Hellstrom Oct. 15, 2024, 1:17 p.m. UTC | #5
On Tue, 2024-10-15 at 10:02 -0300, Jason Gunthorpe wrote:
> On Tue, Oct 15, 2024 at 02:41:24PM +0200, Thomas Hellström wrote:
> > > It has nothing to do with kernel P2P, you are just allowing more
> > > selective filtering of dev_private_owner. You should focus on
> > > that in
> > > the naming, not p2p. ie allow_dev_private()
> > > 
> > > P2P is stuff that is dealing with MEMORY_DEVICE_PCI_P2PDMA.
> > 
> > Yes, although the intention was to incorporate also other fast
> > interconnects in "P2P", not just "PCIe P2P", but I'll definitely
> > take a
> > look at the naming.
> 
> It has nothing to do with that, you are just filtering the device
> private pages differently than default.
> 
> Your end use might be P2P, but at this API level it certainly is not.

Sure. Will find something more suitable.

> 
> > > This is just allowing more instances of the same driver to co-
> > > ordinate
> > > their device private memory handle, for whatever purpose.
> > 
> > Exactly, or theoretically even cross-driver.
> 
> I don't want to see things like drivers changing their pgmap handles
> privately somehow. If we are going to make it cross driver then it
> needs to be generalized alot more.

Cross-driver is initially not a thing, so let's worry about that later.
My impression though is that this is the only change required for
hmm_range_fault() and that infrastructure for opt-in and dma-mapping
would need to be provided elsewhere?

> 
> > > 
> > > Otherwise I don't see a particular problem, though we have talked
> > > about widening the matching for device_private more broadly using
> > > some
> > > kind of grouping tag or something like that instead of a
> > > callback.
> > > You
> > > may consider that as an alternative
> > 
> > Yes. Looked at that, but (if I understand you correctly) that would
> > be
> > the case mentioned in the commit message where the group would be
> > set
> > up statically at dev_pagemap creation time?
> 
> Not necessarily statically, but the membership would be stored in the
> pagemap and by updated during hotplug/etc
> 
> If this is for P2P then the dynamic behavior is pretty limited, some
> kind of NxN bitmap.
> 
> > > hmm_range struct inside a caller private data struct and use that
> > > instead if inventing a whole new struct and pointer.
> > 
> > Our first attempt was based on that but then that wouldn't be
> > reusable
> > in the migrate_device.c code. Hence the extra indirection.
> 
> It is performance path, you should prefer duplication rather than
> slowing it down..

OK. Will look at duplicating.

Thanks,
Thomas


> 
> Jason
Alistair Popple Oct. 16, 2024, 4:46 a.m. UTC | #6
Thomas Hellström <thomas.hellstrom@linux.intel.com> writes:

> On Tue, 2024-10-15 at 10:02 -0300, Jason Gunthorpe wrote:
>> On Tue, Oct 15, 2024 at 02:41:24PM +0200, Thomas Hellström wrote:
>> > > It has nothing to do with kernel P2P, you are just allowing more
>> > > selective filtering of dev_private_owner. You should focus on
>> > > that in
>> > > the naming, not p2p. ie allow_dev_private()
>> > > 
>> > > P2P is stuff that is dealing with MEMORY_DEVICE_PCI_P2PDMA.
>> > 
>> > Yes, although the intention was to incorporate also other fast
>> > interconnects in "P2P", not just "PCIe P2P", but I'll definitely
>> > take a
>> > look at the naming.
>> 
>> It has nothing to do with that, you are just filtering the device
>> private pages differently than default.
>> 
>> Your end use might be P2P, but at this API level it certainly is not.
>
> Sure. Will find something more suitable.
>
>> 
>> > > This is just allowing more instances of the same driver to co-
>> > > ordinate
>> > > their device private memory handle, for whatever purpose.
>> > 
>> > Exactly, or theoretically even cross-driver.
>> 
>> I don't want to see things like drivers changing their pgmap handles
>> privately somehow. If we are going to make it cross driver then it
>> needs to be generalized alot more.
>
> Cross-driver is initially not a thing, so let's worry about that later.
> My impression though is that this is the only change required for
> hmm_range_fault() and that infrastructure for opt-in and dma-mapping
> would need to be provided elsewhere?

Cross-driver is tricky because the device-private pages have no meaning
outside of the driver which owns/allocates them. One option is to have a
callback which returns P2PDMA pages which can then be dma-mapped. See
https://lore.kernel.org/linux-mm/20241015152348.3055360-1-ymaman@nvidia.com/
for an example of that.

>> 
>> > > 
>> > > Otherwise I don't see a particular problem, though we have talked
>> > > about widening the matching for device_private more broadly using
>> > > some
>> > > kind of grouping tag or something like that instead of a
>> > > callback.
>> > > You
>> > > may consider that as an alternative
>> > 
>> > Yes. Looked at that, but (if I understand you correctly) that would
>> > be
>> > the case mentioned in the commit message where the group would be
>> > set
>> > up statically at dev_pagemap creation time?
>> 
>> Not necessarily statically, but the membership would be stored in the
>> pagemap and by updated during hotplug/etc
>> 
>> If this is for P2P then the dynamic behavior is pretty limited, some
>> kind of NxN bitmap.
>> 
>> > > hmm_range struct inside a caller private data struct and use that
>> > > instead if inventing a whole new struct and pointer.
>> > 
>> > Our first attempt was based on that but then that wouldn't be
>> > reusable
>> > in the migrate_device.c code. Hence the extra indirection.
>> 
>> It is performance path, you should prefer duplication rather than
>> slowing it down..
>
> OK. Will look at duplicating.
>
> Thanks,
> Thomas
>
>
>> 
>> Jason
diff mbox series

Patch

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 126a36571667..4de909a1e10a 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -12,6 +12,7 @@ 
 #include <linux/mm.h>
 
 struct mmu_interval_notifier;
+struct p2p_allow;
 
 /*
  * On output:
@@ -97,6 +98,7 @@  struct hmm_range {
 	unsigned long		default_flags;
 	unsigned long		pfn_flags_mask;
 	void			*dev_private_owner;
+	struct p2p_allow        *p2p;
 };
 
 /*
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 002e49b2ebd9..0ff085b633e3 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -183,10 +183,37 @@  static inline unsigned long migrate_pfn(unsigned long pfn)
 	return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID;
 }
 
+struct p2p_allow;
+
+/**
+ * struct p2p_allow_ops - Functions for detailed cross-device access.
+ */
+struct p2p_allow_ops {
+	/**
+	 * @p2p_allow: Whether to allow cross-device access to device_private pages.
+	 * @allow: Pointer to a struct p2p_allow. Typically subclassed by the caller
+	 * to provide needed information.
+	 * @page: The page being queried.
+	 */
+	bool (*p2p_allow)(struct p2p_allow *allow, struct page *page);
+};
+
+/**
+ * struct p2p_allow - Information needed to allow cross-device access.
+ * @ops: Pointer to a struct p2p_allow_ops.
+ *
+ * This struct is intended to be embedded / subclassed to provide additional
+ * information needed by the @ops p2p_allow() callback.
+ */
+struct p2p_allow {
+	const struct p2p_allow_ops *ops;
+};
+
 enum migrate_vma_direction {
 	MIGRATE_VMA_SELECT_SYSTEM = 1 << 0,
 	MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1,
 	MIGRATE_VMA_SELECT_DEVICE_COHERENT = 1 << 2,
+	MIGRATE_VMA_SELECT_DEVICE_P2P = 1 << 3,
 };
 
 struct migrate_vma {
@@ -222,6 +249,8 @@  struct migrate_vma {
 	 * a migrate_to_ram() callback.
 	 */
 	struct page		*fault_page;
+	/* Optional identification of devices for p2p migration */
+	struct p2p_allow        *p2p;
 };
 
 int migrate_vma_setup(struct migrate_vma *args);
diff --git a/mm/hmm.c b/mm/hmm.c
index 7e0229ae4a5a..8c28f9b22ed2 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -19,6 +19,7 @@ 
 #include <linux/pagemap.h>
 #include <linux/swapops.h>
 #include <linux/hugetlb.h>
+#include <linux/migrate.h>
 #include <linux/memremap.h>
 #include <linux/sched/mm.h>
 #include <linux/jump_label.h>
@@ -220,6 +221,15 @@  static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
 	return pte_write(pte) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;
 }
 
+static bool hmm_allow_devmem(struct hmm_range *range, struct page *page)
+{
+	if (likely(page->pgmap->owner == range->dev_private_owner))
+		return true;
+	if (likely(!range->p2p))
+		return false;
+	return range->p2p->ops->p2p_allow(range->p2p, page);
+}
+
 static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 			      unsigned long end, pmd_t *pmdp, pte_t *ptep,
 			      unsigned long *hmm_pfn)
@@ -248,8 +258,7 @@  static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 		 * just report the PFN.
 		 */
 		if (is_device_private_entry(entry) &&
-		    pfn_swap_entry_to_page(entry)->pgmap->owner ==
-		    range->dev_private_owner) {
+		    hmm_allow_devmem(range, pfn_swap_entry_to_page(entry))) {
 			cpu_flags = HMM_PFN_VALID;
 			if (is_writable_device_private_entry(entry))
 				cpu_flags |= HMM_PFN_WRITE;
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 9cf26592ac93..8e643a3872c9 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -54,6 +54,13 @@  static int migrate_vma_collect_hole(unsigned long start,
 	return 0;
 }
 
+static bool migrate_vma_allow_p2p(struct migrate_vma *migrate, struct page *page)
+{
+	if (likely(!migrate->p2p))
+		return false;
+	return migrate->p2p->ops->p2p_allow(migrate->p2p, page);
+}
+
 static int migrate_vma_collect_pmd(pmd_t *pmdp,
 				   unsigned long start,
 				   unsigned long end,
@@ -138,6 +145,11 @@  static int migrate_vma_collect_pmd(pmd_t *pmdp,
 			    page->pgmap->owner != migrate->pgmap_owner)
 				goto next;
 
+			if (!(migrate->flags &
+			      MIGRATE_VMA_SELECT_DEVICE_P2P) ||
+			    !migrate_vma_allow_p2p(migrate, page))
+				goto next;
+
 			mpfn = migrate_pfn(page_to_pfn(page)) |
 					MIGRATE_PFN_MIGRATE;
 			if (is_writable_device_private_entry(entry))