diff mbox series

[v2,2/2] proc: Add getting pages info of ZONE_DEVICE support

Message ID 20220110141957.259022-3-sxwjean@me.com (mailing list archive)
State New
Headers show
Series Add support for getting page info of ZONE_DEVICE by /proc/kpage* | expand

Commit Message

Xiongwei Song Jan. 10, 2022, 2:19 p.m. UTC
From: Xiongwei Song <sxwjean@gmail.com>

When requesting pages info by /proc/kpage*, the pages in ZONE_DEVICE were
missed.

The pfn_to_devmap_page() function can help to get page that belongs to
ZONE_DEVICE.

Signed-off-by: Xiongwei Song <sxwjean@gmail.com>
---
 fs/proc/page.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

Comments

David Hildenbrand Jan. 10, 2022, 2:34 p.m. UTC | #1
On 10.01.22 15:19, sxwjean@me.com wrote:
> From: Xiongwei Song <sxwjean@gmail.com>
> 
> When requesting pages info by /proc/kpage*, the pages in ZONE_DEVICE were
> missed.
>

The "missed" part makes it sound like this was done by accident. On the
contrary, for now we decided to not expose these pages that way, for
example, because determining if the memmap was already properly
initialized isn't quite easy.


> The pfn_to_devmap_page() function can help to get page that belongs to
> ZONE_DEVICE.

What's the main motivation for this?

> 
> Signed-off-by: Xiongwei Song <sxwjean@gmail.com>
> ---
>  fs/proc/page.c | 35 ++++++++++++++++++++++-------------
>  1 file changed, 22 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/proc/page.c b/fs/proc/page.c
> index 9f1077d94cde..2cdc2b315ff8 100644
> --- a/fs/proc/page.c
> +++ b/fs/proc/page.c
> @@ -15,6 +15,7 @@
>  #include <linux/page_idle.h>
>  #include <linux/kernel-page-flags.h>
>  #include <linux/uaccess.h>
> +#include <linux/memremap.h>
>  #include "internal.h"
>  
>  #define KPMSIZE sizeof(u64)
> @@ -46,6 +47,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
>  {
>  	const unsigned long max_dump_pfn = get_max_dump_pfn();
>  	u64 __user *out = (u64 __user *)buf;
> +	struct dev_pagemap *pgmap = NULL;
>  	struct page *ppage;
>  	unsigned long src = *ppos;
>  	unsigned long pfn;
> @@ -60,17 +62,18 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
>  	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
>  
>  	while (count > 0) {
> -		/*
> -		 * TODO: ZONE_DEVICE support requires to identify
> -		 * memmaps that were actually initialized.
> -		 */
>  		ppage = pfn_to_online_page(pfn);
> +		if (!ppage)
> +			ppage = pfn_to_devmap_page(pfn, &pgmap);
>  
>  		if (!ppage || PageSlab(ppage) || page_has_type(ppage))
>  			pcount = 0;
>  		else
>  			pcount = page_mapcount(ppage);
>  
> +		if (pgmap)
> +			put_dev_pagemap(pgmap);

Ehm, don't you have to reset pgmap back to NULL? Otherwise during the
next iteration, you'll see pgmap != NULL again.

> +
>  		if (put_user(pcount, out)) {
>  			ret = -EFAULT;
>  			break;
> @@ -229,10 +232,12 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
>  {
>  	const unsigned long max_dump_pfn = get_max_dump_pfn();
>  	u64 __user *out = (u64 __user *)buf;
> +	struct dev_pagemap *pgmap = NULL;
>  	struct page *ppage;
>  	unsigned long src = *ppos;
>  	unsigned long pfn;
>  	ssize_t ret = 0;
> +	u64 flags;
>  
>  	pfn = src / KPMSIZE;
>  	if (src & KPMMASK || count & KPMMASK)
> @@ -242,13 +247,15 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
>  	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
>  
>  	while (count > 0) {
> -		/*
> -		 * TODO: ZONE_DEVICE support requires to identify
> -		 * memmaps that were actually initialized.
> -		 */
>  		ppage = pfn_to_online_page(pfn);
> +		if (!ppage)
> +			ppage = pfn_to_devmap_page(pfn, &pgmap);
> +
> +		flags = stable_page_flags(ppage);
> +		if (pgmap)
> +			put_dev_pagemap(pgmap);

Similar comment.

>  
> -		if (put_user(stable_page_flags(ppage), out)) {
> +		if (put_user(flags, out)) {
>  			ret = -EFAULT;
>  			break;
>  		}
> @@ -277,6 +284,7 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
>  {
>  	const unsigned long max_dump_pfn = get_max_dump_pfn();
>  	u64 __user *out = (u64 __user *)buf;
> +	struct dev_pagemap *pgmap = NULL;
>  	struct page *ppage;
>  	unsigned long src = *ppos;
>  	unsigned long pfn;
> @@ -291,17 +299,18 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
>  	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
>  
>  	while (count > 0) {
> -		/*
> -		 * TODO: ZONE_DEVICE support requires to identify
> -		 * memmaps that were actually initialized.
> -		 */
>  		ppage = pfn_to_online_page(pfn);
> +		if (!ppage)
> +			ppage = pfn_to_devmap_page(pfn, &pgmap);
>  
>  		if (ppage)
>  			ino = page_cgroup_ino(ppage);
>  		else
>  			ino = 0;
>  
> +		if (pgmap)
> +			put_dev_pagemap(pgmap);

Similar comment.


IIRC, we might still stumble over uninitialized devmap memmaps that
essentially contain garbage -- I recall it might be the device metadata.
I wonder if we at least have to check pgmap_pfn_valid().
Xiongwei Song Jan. 11, 2022, 7:51 a.m. UTC | #2
Hi David,

On Mon, Jan 10, 2022 at 10:34 PM David Hildenbrand <david@redhat.com> wrote:
>
> On 10.01.22 15:19, sxwjean@me.com wrote:
> > From: Xiongwei Song <sxwjean@gmail.com>
> >
> > When requesting pages info by /proc/kpage*, the pages in ZONE_DEVICE were
> > missed.
> >
>
> The "missed" part makes it sound like this was done by accident. On the
> contrary, for now we decided to not expose these pages that way, for
> example, because determining if the memmap was already properly
> initialized isn't quite easy.
>

Understood. Thank you for the explanation.

>
> > The pfn_to_devmap_page() function can help to get page that belongs to
> > ZONE_DEVICE.
>
> What's the main motivation for this?

There is no special case. My customer wanted to check page flags in system wide.
I tried to find the way and found there is no capability for pages of
ZONE_DEVICE,
so tried to make the patch and see if upstream needs it.

>
> >
> > Signed-off-by: Xiongwei Song <sxwjean@gmail.com>
> > ---
> >  fs/proc/page.c | 35 ++++++++++++++++++++++-------------
> >  1 file changed, 22 insertions(+), 13 deletions(-)
> >
> > diff --git a/fs/proc/page.c b/fs/proc/page.c
> > index 9f1077d94cde..2cdc2b315ff8 100644
> > --- a/fs/proc/page.c
> > +++ b/fs/proc/page.c
> > @@ -15,6 +15,7 @@
> >  #include <linux/page_idle.h>
> >  #include <linux/kernel-page-flags.h>
> >  #include <linux/uaccess.h>
> > +#include <linux/memremap.h>
> >  #include "internal.h"
> >
> >  #define KPMSIZE sizeof(u64)
> > @@ -46,6 +47,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
> >  {
> >       const unsigned long max_dump_pfn = get_max_dump_pfn();
> >       u64 __user *out = (u64 __user *)buf;
> > +     struct dev_pagemap *pgmap = NULL;
> >       struct page *ppage;
> >       unsigned long src = *ppos;
> >       unsigned long pfn;
> > @@ -60,17 +62,18 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
> >       count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
> >
> >       while (count > 0) {
> > -             /*
> > -              * TODO: ZONE_DEVICE support requires to identify
> > -              * memmaps that were actually initialized.
> > -              */
> >               ppage = pfn_to_online_page(pfn);
> > +             if (!ppage)
> > +                     ppage = pfn_to_devmap_page(pfn, &pgmap);
> >
> >               if (!ppage || PageSlab(ppage) || page_has_type(ppage))
> >                       pcount = 0;
> >               else
> >                       pcount = page_mapcount(ppage);
> >
> > +             if (pgmap)
> > +                     put_dev_pagemap(pgmap);
>
> Ehm, don't you have to reset pgmap back to NULL? Otherwise during the
> next iteration, you'll see pgmap != NULL again.

Oops. I totally agree. Will do this in the next version.

>
> > +
> >               if (put_user(pcount, out)) {
> >                       ret = -EFAULT;
> >                       break;
> > @@ -229,10 +232,12 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
> >  {
> >       const unsigned long max_dump_pfn = get_max_dump_pfn();
> >       u64 __user *out = (u64 __user *)buf;
> > +     struct dev_pagemap *pgmap = NULL;
> >       struct page *ppage;
> >       unsigned long src = *ppos;
> >       unsigned long pfn;
> >       ssize_t ret = 0;
> > +     u64 flags;
> >
> >       pfn = src / KPMSIZE;
> >       if (src & KPMMASK || count & KPMMASK)
> > @@ -242,13 +247,15 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
> >       count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
> >
> >       while (count > 0) {
> > -             /*
> > -              * TODO: ZONE_DEVICE support requires to identify
> > -              * memmaps that were actually initialized.
> > -              */
> >               ppage = pfn_to_online_page(pfn);
> > +             if (!ppage)
> > +                     ppage = pfn_to_devmap_page(pfn, &pgmap);
> > +
> > +             flags = stable_page_flags(ppage);
> > +             if (pgmap)
> > +                     put_dev_pagemap(pgmap);
>
> Similar comment.

Okay.

>
> >
> > -             if (put_user(stable_page_flags(ppage), out)) {
> > +             if (put_user(flags, out)) {
> >                       ret = -EFAULT;
> >                       break;
> >               }
> > @@ -277,6 +284,7 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
> >  {
> >       const unsigned long max_dump_pfn = get_max_dump_pfn();
> >       u64 __user *out = (u64 __user *)buf;
> > +     struct dev_pagemap *pgmap = NULL;
> >       struct page *ppage;
> >       unsigned long src = *ppos;
> >       unsigned long pfn;
> > @@ -291,17 +299,18 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
> >       count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
> >
> >       while (count > 0) {
> > -             /*
> > -              * TODO: ZONE_DEVICE support requires to identify
> > -              * memmaps that were actually initialized.
> > -              */
> >               ppage = pfn_to_online_page(pfn);
> > +             if (!ppage)
> > +                     ppage = pfn_to_devmap_page(pfn, &pgmap);
> >
> >               if (ppage)
> >                       ino = page_cgroup_ino(ppage);
> >               else
> >                       ino = 0;
> >
> > +             if (pgmap)
> > +                     put_dev_pagemap(pgmap);
>
> Similar comment.

Okay.

>
>
> IIRC, we might still stumble over uninitialized devmap memmaps that
> essentially contain garbage -- I recall it might be the device metadata.
> I wonder if we at least have to check pgmap_pfn_valid().

Oh, ok.  But how about putting pgmap_pfn_valid into pfn_to_devmap_page()?

Appreciated your review.

Regards,
Xiongwei
diff mbox series

Patch

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 9f1077d94cde..2cdc2b315ff8 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -15,6 +15,7 @@ 
 #include <linux/page_idle.h>
 #include <linux/kernel-page-flags.h>
 #include <linux/uaccess.h>
+#include <linux/memremap.h>
 #include "internal.h"
 
 #define KPMSIZE sizeof(u64)
@@ -46,6 +47,7 @@  static ssize_t kpagecount_read(struct file *file, char __user *buf,
 {
 	const unsigned long max_dump_pfn = get_max_dump_pfn();
 	u64 __user *out = (u64 __user *)buf;
+	struct dev_pagemap *pgmap = NULL;
 	struct page *ppage;
 	unsigned long src = *ppos;
 	unsigned long pfn;
@@ -60,17 +62,18 @@  static ssize_t kpagecount_read(struct file *file, char __user *buf,
 	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
 
 	while (count > 0) {
-		/*
-		 * TODO: ZONE_DEVICE support requires to identify
-		 * memmaps that were actually initialized.
-		 */
 		ppage = pfn_to_online_page(pfn);
+		if (!ppage)
+			ppage = pfn_to_devmap_page(pfn, &pgmap);
 
 		if (!ppage || PageSlab(ppage) || page_has_type(ppage))
 			pcount = 0;
 		else
 			pcount = page_mapcount(ppage);
 
+		if (pgmap)
+			put_dev_pagemap(pgmap);
+
 		if (put_user(pcount, out)) {
 			ret = -EFAULT;
 			break;
@@ -229,10 +232,12 @@  static ssize_t kpageflags_read(struct file *file, char __user *buf,
 {
 	const unsigned long max_dump_pfn = get_max_dump_pfn();
 	u64 __user *out = (u64 __user *)buf;
+	struct dev_pagemap *pgmap = NULL;
 	struct page *ppage;
 	unsigned long src = *ppos;
 	unsigned long pfn;
 	ssize_t ret = 0;
+	u64 flags;
 
 	pfn = src / KPMSIZE;
 	if (src & KPMMASK || count & KPMMASK)
@@ -242,13 +247,15 @@  static ssize_t kpageflags_read(struct file *file, char __user *buf,
 	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
 
 	while (count > 0) {
-		/*
-		 * TODO: ZONE_DEVICE support requires to identify
-		 * memmaps that were actually initialized.
-		 */
 		ppage = pfn_to_online_page(pfn);
+		if (!ppage)
+			ppage = pfn_to_devmap_page(pfn, &pgmap);
+
+		flags = stable_page_flags(ppage);
+		if (pgmap)
+			put_dev_pagemap(pgmap);
 
-		if (put_user(stable_page_flags(ppage), out)) {
+		if (put_user(flags, out)) {
 			ret = -EFAULT;
 			break;
 		}
@@ -277,6 +284,7 @@  static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
 {
 	const unsigned long max_dump_pfn = get_max_dump_pfn();
 	u64 __user *out = (u64 __user *)buf;
+	struct dev_pagemap *pgmap = NULL;
 	struct page *ppage;
 	unsigned long src = *ppos;
 	unsigned long pfn;
@@ -291,17 +299,18 @@  static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
 	count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
 
 	while (count > 0) {
-		/*
-		 * TODO: ZONE_DEVICE support requires to identify
-		 * memmaps that were actually initialized.
-		 */
 		ppage = pfn_to_online_page(pfn);
+		if (!ppage)
+			ppage = pfn_to_devmap_page(pfn, &pgmap);
 
 		if (ppage)
 			ino = page_cgroup_ino(ppage);
 		else
 			ino = 0;
 
+		if (pgmap)
+			put_dev_pagemap(pgmap);
+
 		if (put_user(ino, out)) {
 			ret = -EFAULT;
 			break;