diff mbox series

[RFC,v1,2/2] mm, pcp: add more detail info about high order page count

Message ID 20240115093437.87814-3-vernhao@tencent.com (mailing list archive)
State New
Headers show
Series mm, pcp: add high order page info in /proc/zoneinfo | expand

Commit Message

Vern Hao Jan. 15, 2024, 9:34 a.m. UTC
From: Xin Hao <vernhao@tencent.com>

With this patch, we can see the distribution of pages of different orders on
each cpu, just like below.
	#cat /proc/zoneinfo
    ....
    cpu: 2
              total_count: 14286
                  order0 : 1260
                  order1 : 13
                  order2 : 42
                  order3 : 4
                  order4 : 0
                  order5 : 0
                  order6 : 0
                  order7 : 0
                  order8 : 0
                  order9 : 25
                  order10: 0
                  order11: 0
                  order12: 0
              high:  14541
              batch: 63

Signed-off-by: Xin Hao <vernhao@tencent.com>
---
 include/linux/mmzone.h |  1 +
 mm/page_alloc.c        |  4 ++++
 mm/vmstat.c            | 18 ++++++++++++------
 3 files changed, 17 insertions(+), 6 deletions(-)

Comments

David Rientjes Jan. 15, 2024, 8:34 p.m. UTC | #1
On Mon, 15 Jan 2024, Vern Hao wrote:

> From: Xin Hao <vernhao@tencent.com>
> 
> With this patch, we can see the distribution of pages of different orders on
> each cpu, just like below.
> 	#cat /proc/zoneinfo
>     ....
>     cpu: 2
>               total_count: 14286

I don't think we should be changing the naming of the field if there are 
existing users that parse /proc/zoneinfo.

>                   order0 : 1260
>                   order1 : 13
>                   order2 : 42
>                   order3 : 4
>                   order4 : 0
>                   order5 : 0
>                   order6 : 0
>                   order7 : 0
>                   order8 : 0
>                   order9 : 25
>                   order10: 0
>                   order11: 0
>                   order12: 0
>               high:  14541
>               batch: 63
> 
> Signed-off-by: Xin Hao <vernhao@tencent.com>
> ---
>  include/linux/mmzone.h |  1 +
>  mm/page_alloc.c        |  4 ++++
>  mm/vmstat.c            | 18 ++++++++++++------
>  3 files changed, 17 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 883168776fea..55d25b4f51e5 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -684,6 +684,7 @@ enum zone_watermarks {
>  struct per_cpu_pages {
>  	spinlock_t lock;	/* Protects lists field */
>  	int total_count;	/* total number of pages in the list */
> +	int count[NR_PCP_LISTS]; /* per-order page counts */
>  	int high;		/* high watermark, emptying needed */
>  	int high_min;		/* min high watermark */
>  	int high_max;		/* max high watermark */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 4e91e429b8d1..7ec2dc5c5ea5 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1228,6 +1228,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
>  			list_del(&page->pcp_list);
>  			count -= nr_pages;
>  			pcp->total_count -= nr_pages;
> +			pcp->count[order] -= 1;
>  
>  			/* MIGRATE_ISOLATE page should not go to pcplists */
>  			VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
> @@ -2478,6 +2479,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
>  	pindex = order_to_pindex(migratetype, order);
>  	list_add(&page->pcp_list, &pcp->lists[pindex]);
>  	pcp->total_count += 1 << order;
> +	pcp->count[order] += 1;
>  
>  	batch = READ_ONCE(pcp->batch);
>  	/*
> @@ -2858,6 +2860,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
>  					migratetype, alloc_flags);
>  
>  			pcp->total_count += alloced << order;
> +			pcp->count[order] += alloced;
>  			if (unlikely(list_empty(list)))
>  				return NULL;
>  		}
> @@ -2865,6 +2868,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
>  		page = list_first_entry(list, struct page, pcp_list);
>  		list_del(&page->pcp_list);
>  		pcp->total_count -= 1 << order;
> +		pcp->count[order] -= 1;
>  	} while (check_new_pages(page, order));
>  
>  	return page;
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index c1e8096ff0a6..e04300ec450f 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1735,19 +1735,25 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
>  
>  	seq_printf(m, "\n  pagesets");
>  	for_each_online_cpu(i) {
> +		int j;
>  		struct per_cpu_pages *pcp;
>  		struct per_cpu_zonestat __maybe_unused *pzstats;
>  
>  		pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
>  		seq_printf(m,
>  			   "\n    cpu: %i"
> -			   "\n              count: %i"
> -			   "\n              high:  %i"
> -			   "\n              batch: %i",
> +			   "\n              total_count: %i",
>  			   i,
> -			   pcp->total_count,
> -			   pcp->high,
> -			   pcp->batch);
> +			   pcp->total_count);
> +		for (j = 0; j < NR_PCP_LISTS; j++)
> +			seq_printf(m,
> +				   "\n                  order%-2i: %-3i",
> +				   j, pcp->count[j]);
> +		seq_printf(m,
> +                          "\n              high:  %i"
> +                          "\n              batch: %i",
> +                          pcp->high,
> +                          pcp->batch);
>  #ifdef CONFIG_SMP
>  		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
>  		seq_printf(m, "\n  vm stats threshold: %d",
> -- 
> 2.31.1
> 
> 
>
Mel Gorman Jan. 16, 2024, 2:31 p.m. UTC | #2
On Mon, Jan 15, 2024 at 05:34:36PM +0800, Vern Hao wrote:
> From: Xin Hao <vernhao@tencent.com>
> 
> With this patch, we can see the distribution of pages of different orders on
> each cpu, just like below.
> 	#cat /proc/zoneinfo
>     ....
>     cpu: 2
>               total_count: 14286
>                   order0 : 1260
>                   order1 : 13
>                   order2 : 42
>                   order3 : 4
>                   order4 : 0
>                   order5 : 0
>                   order6 : 0
>                   order7 : 0
>                   order8 : 0
>                   order9 : 25
>                   order10: 0
>                   order11: 0
>                   order12: 0
>               high:  14541
>               batch: 63
> 
> Signed-off-by: Xin Hao <vernhao@tencent.com>

I am not a major fan because increasing the size of a per-cpu structure for
debugging purposes incurs a cost for everyone while only a tiny minority
may care. There is a mild risk it would break existing parsers of that file
although maybe that's not a big deal. However, the same information could be
extracted by locking the pcp structures and counting the items per list. It
would increase the cost of reading zoneinfo but it's unlikely the file is
read at high frequency. If that was a concern, a separate proc file could be
used. Finally, the same information likely can be extracted via a systemtap
script, a BPF script (if it can get to the right symbols and locking, I
didn't check) or via a kernel probe. Even with that information, it's not
clear what meaningful action a user can take, so this is a developer-only
feature really with a cost incurred for everybody.
diff mbox series

Patch

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 883168776fea..55d25b4f51e5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -684,6 +684,7 @@  enum zone_watermarks {
 struct per_cpu_pages {
 	spinlock_t lock;	/* Protects lists field */
 	int total_count;	/* total number of pages in the list */
+	int count[NR_PCP_LISTS]; /* per-order page counts */
 	int high;		/* high watermark, emptying needed */
 	int high_min;		/* min high watermark */
 	int high_max;		/* max high watermark */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e91e429b8d1..7ec2dc5c5ea5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1228,6 +1228,7 @@  static void free_pcppages_bulk(struct zone *zone, int count,
 			list_del(&page->pcp_list);
 			count -= nr_pages;
 			pcp->total_count -= nr_pages;
+			pcp->count[order] -= 1;
 
 			/* MIGRATE_ISOLATE page should not go to pcplists */
 			VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
@@ -2478,6 +2479,7 @@  static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
 	pindex = order_to_pindex(migratetype, order);
 	list_add(&page->pcp_list, &pcp->lists[pindex]);
 	pcp->total_count += 1 << order;
+	pcp->count[order] += 1;
 
 	batch = READ_ONCE(pcp->batch);
 	/*
@@ -2858,6 +2860,7 @@  struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 					migratetype, alloc_flags);
 
 			pcp->total_count += alloced << order;
+			pcp->count[order] += alloced;
 			if (unlikely(list_empty(list)))
 				return NULL;
 		}
@@ -2865,6 +2868,7 @@  struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 		page = list_first_entry(list, struct page, pcp_list);
 		list_del(&page->pcp_list);
 		pcp->total_count -= 1 << order;
+		pcp->count[order] -= 1;
 	} while (check_new_pages(page, order));
 
 	return page;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c1e8096ff0a6..e04300ec450f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1735,19 +1735,25 @@  static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 
 	seq_printf(m, "\n  pagesets");
 	for_each_online_cpu(i) {
+		int j;
 		struct per_cpu_pages *pcp;
 		struct per_cpu_zonestat __maybe_unused *pzstats;
 
 		pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
 		seq_printf(m,
 			   "\n    cpu: %i"
-			   "\n              count: %i"
-			   "\n              high:  %i"
-			   "\n              batch: %i",
+			   "\n              total_count: %i",
 			   i,
-			   pcp->total_count,
-			   pcp->high,
-			   pcp->batch);
+			   pcp->total_count);
+		for (j = 0; j < NR_PCP_LISTS; j++)
+			seq_printf(m,
+				   "\n                  order%-2i: %-3i",
+				   j, pcp->count[j]);
+		seq_printf(m,
+                          "\n              high:  %i"
+                          "\n              batch: %i",
+                          pcp->high,
+                          pcp->batch);
 #ifdef CONFIG_SMP
 		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
 		seq_printf(m, "\n  vm stats threshold: %d",