diff mbox series

[02/13] pci/p2pdma: Don't initialise page refcount to one

Message ID c66cc5c5142813049ffdf9af75302f5064048241.1719386613.git-series.apopple@nvidia.com
State Superseded
Headers show
Series fs/dax: Fix FS DAX page reference counts | expand

Commit Message

Alistair Popple June 27, 2024, 12:54 a.m. UTC
The reference counts for ZONE_DEVICE private pages should be
initialised by the driver when the page is actually allocated by the
driver allocator, not when they are first created. This is currently
the case for MEMORY_DEVICE_PRIVATE and MEMORY_DEVICE_COHERENT pages
but not MEMORY_DEVICE_PCI_P2PDMA pages so fix that up.

Signed-off-by: Alistair Popple <apopple@nvidia.com>
---
 drivers/pci/p2pdma.c | 2 ++
 mm/memremap.c        | 8 ++++----
 mm/mm_init.c         | 4 +++-
 3 files changed, 9 insertions(+), 5 deletions(-)

Comments

Christoph Hellwig June 27, 2024, 5:30 a.m. UTC | #1
On Thu, Jun 27, 2024 at 10:54:17AM +1000, Alistair Popple wrote:
> The reference counts for ZONE_DEVICE private pages should be
> initialised by the driver when the page is actually allocated by the
> driver allocator, not when they are first created. This is currently
> the case for MEMORY_DEVICE_PRIVATE and MEMORY_DEVICE_COHERENT pages
> but not MEMORY_DEVICE_PCI_P2PDMA pages so fix that up.
> 
> Signed-off-by: Alistair Popple <apopple@nvidia.com>
> ---
>  drivers/pci/p2pdma.c | 2 ++
>  mm/memremap.c        | 8 ++++----
>  mm/mm_init.c         | 4 +++-
>  3 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
> index 4f47a13..1e9ea32 100644
> --- a/drivers/pci/p2pdma.c
> +++ b/drivers/pci/p2pdma.c
> @@ -128,6 +128,8 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
>  		goto out;
>  	}
>  
> +	set_page_count(virt_to_page(kaddr), 1);

Can we have a comment here?  Without that it feels a bit too much like
black magic when reading the code.

> +	if (folio->page.pgmap->type == MEMORY_DEVICE_PRIVATE ||
> +	    folio->page.pgmap->type == MEMORY_DEVICE_COHERENT)
> +		put_dev_pagemap(folio->page.pgmap);
> +	else if (folio->page.pgmap->type != MEMORY_DEVICE_PCI_P2PDMA)
>  		/*
>  		 * Reset the refcount to 1 to prepare for handing out the page
>  		 * again.
>  		 */
>  		folio_set_count(folio, 1);

Where the else if evaluates to MEMORY_DEVICE_FS_DAX ||
MEMORY_DEVICE_GENERIC.  Maybe make this a switch statement handling
all cases of the enum to make it clear and have the compiler generate
a warning when a new type is added without being handled here?

> @@ -1014,7 +1015,8 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
>  	 * which will set the page count to 1 when allocating the page.
>  	 */
>  	if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
> +	    pgmap->type == MEMORY_DEVICE_COHERENT ||
> +	    pgmap->type == MEMORY_DEVICE_PCI_P2PDMA)
>  		set_page_count(page, 0);

Similarly here a switch with explanation of what will be handled and
what not would be nice.
Bjorn Helgaas June 29, 2024, 9:28 p.m. UTC | #2
On Thu, Jun 27, 2024 at 10:54:17AM +1000, Alistair Popple wrote:
> The reference counts for ZONE_DEVICE private pages should be
> initialised by the driver when the page is actually allocated by the
> driver allocator, not when they are first created. This is currently
> the case for MEMORY_DEVICE_PRIVATE and MEMORY_DEVICE_COHERENT pages
> but not MEMORY_DEVICE_PCI_P2PDMA pages so fix that up.

If you tag the subject line with PCI, please run "git log --oneline
drivers/pci/p2pdma.c" and make yours look like previous ones
("PCI/P2PDMA").

Also recast it to say something semantically useful about what it
*does*, not what it *doesn't* do.  Maybe something about initializing
the refcount where the page is allocated?  Especially since the only
p2pdma.c change here is to "set_page_count(..., 1)", which looks like
exactly the opposite of "don't initialize refcount to one".

> Signed-off-by: Alistair Popple <apopple@nvidia.com>
> ---
>  drivers/pci/p2pdma.c | 2 ++
>  mm/memremap.c        | 8 ++++----
>  mm/mm_init.c         | 4 +++-
>  3 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
> index 4f47a13..1e9ea32 100644
> --- a/drivers/pci/p2pdma.c
> +++ b/drivers/pci/p2pdma.c
> @@ -128,6 +128,8 @@ static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
>  		goto out;
>  	}
>  
> +	set_page_count(virt_to_page(kaddr), 1);
> +
>  	/*
>  	 * vm_insert_page() can sleep, so a reference is taken to mapping
>  	 * such that rcu_read_unlock() can be done before inserting the
> diff --git a/mm/memremap.c b/mm/memremap.c
> index 40d4547..caccbd8 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -488,15 +488,15 @@ void free_zone_device_folio(struct folio *folio)
>  	folio->mapping = NULL;
>  	folio->page.pgmap->ops->page_free(folio_page(folio, 0));
>  
> -	if (folio->page.pgmap->type != MEMORY_DEVICE_PRIVATE &&
> -	    folio->page.pgmap->type != MEMORY_DEVICE_COHERENT)
> +	if (folio->page.pgmap->type == MEMORY_DEVICE_PRIVATE ||
> +	    folio->page.pgmap->type == MEMORY_DEVICE_COHERENT)
> +		put_dev_pagemap(folio->page.pgmap);
> +	else if (folio->page.pgmap->type != MEMORY_DEVICE_PCI_P2PDMA)
>  		/*
>  		 * Reset the refcount to 1 to prepare for handing out the page
>  		 * again.
>  		 */
>  		folio_set_count(folio, 1);
> -	else
> -		put_dev_pagemap(folio->page.pgmap);
>  }
>  
>  void zone_device_page_init(struct page *page)
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 3ec0493..b7e1599 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -6,6 +6,7 @@
>   * Author Mel Gorman <mel@csn.ul.ie>
>   *
>   */
> +#include "linux/memremap.h"
>  #include <linux/kernel.h>
>  #include <linux/init.h>
>  #include <linux/kobject.h>
> @@ -1014,7 +1015,8 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
>  	 * which will set the page count to 1 when allocating the page.
>  	 */
>  	if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
> -	    pgmap->type == MEMORY_DEVICE_COHERENT)
> +	    pgmap->type == MEMORY_DEVICE_COHERENT ||
> +	    pgmap->type == MEMORY_DEVICE_PCI_P2PDMA)
>  		set_page_count(page, 0);
>  }
>  
> -- 
> git-series 0.9.1
diff mbox series

Patch

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 4f47a13..1e9ea32 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -128,6 +128,8 @@  static int p2pmem_alloc_mmap(struct file *filp, struct kobject *kobj,
 		goto out;
 	}
 
+	set_page_count(virt_to_page(kaddr), 1);
+
 	/*
 	 * vm_insert_page() can sleep, so a reference is taken to mapping
 	 * such that rcu_read_unlock() can be done before inserting the
diff --git a/mm/memremap.c b/mm/memremap.c
index 40d4547..caccbd8 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -488,15 +488,15 @@  void free_zone_device_folio(struct folio *folio)
 	folio->mapping = NULL;
 	folio->page.pgmap->ops->page_free(folio_page(folio, 0));
 
-	if (folio->page.pgmap->type != MEMORY_DEVICE_PRIVATE &&
-	    folio->page.pgmap->type != MEMORY_DEVICE_COHERENT)
+	if (folio->page.pgmap->type == MEMORY_DEVICE_PRIVATE ||
+	    folio->page.pgmap->type == MEMORY_DEVICE_COHERENT)
+		put_dev_pagemap(folio->page.pgmap);
+	else if (folio->page.pgmap->type != MEMORY_DEVICE_PCI_P2PDMA)
 		/*
 		 * Reset the refcount to 1 to prepare for handing out the page
 		 * again.
 		 */
 		folio_set_count(folio, 1);
-	else
-		put_dev_pagemap(folio->page.pgmap);
 }
 
 void zone_device_page_init(struct page *page)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3ec0493..b7e1599 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -6,6 +6,7 @@ 
  * Author Mel Gorman <mel@csn.ul.ie>
  *
  */
+#include "linux/memremap.h"
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/kobject.h>
@@ -1014,7 +1015,8 @@  static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
 	 * which will set the page count to 1 when allocating the page.
 	 */
 	if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
-	    pgmap->type == MEMORY_DEVICE_COHERENT)
+	    pgmap->type == MEMORY_DEVICE_COHERENT ||
+	    pgmap->type == MEMORY_DEVICE_PCI_P2PDMA)
 		set_page_count(page, 0);
 }