Message ID | 20240115093437.87814-3-vernhao@tencent.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm, pcp: add high order page info in /proc/zoneinfo | expand |
On Mon, 15 Jan 2024, Vern Hao wrote: > From: Xin Hao <vernhao@tencent.com> > > With this patch, we can see the distribution of pages of different orders on > each cpu, just like below. > #cat /proc/zoneinfo > .... > cpu: 2 > total_count: 14286 I don't think we should be changing the naming of the field if there are existing users that parse /proc/zoneinfo. > order0 : 1260 > order1 : 13 > order2 : 42 > order3 : 4 > order4 : 0 > order5 : 0 > order6 : 0 > order7 : 0 > order8 : 0 > order9 : 25 > order10: 0 > order11: 0 > order12: 0 > high: 14541 > batch: 63 > > Signed-off-by: Xin Hao <vernhao@tencent.com> > --- > include/linux/mmzone.h | 1 + > mm/page_alloc.c | 4 ++++ > mm/vmstat.c | 18 ++++++++++++------ > 3 files changed, 17 insertions(+), 6 deletions(-) > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > index 883168776fea..55d25b4f51e5 100644 > --- a/include/linux/mmzone.h > +++ b/include/linux/mmzone.h > @@ -684,6 +684,7 @@ enum zone_watermarks { > struct per_cpu_pages { > spinlock_t lock; /* Protects lists field */ > int total_count; /* total number of pages in the list */ > + int count[NR_PCP_LISTS]; /* per-order page counts */ > int high; /* high watermark, emptying needed */ > int high_min; /* min high watermark */ > int high_max; /* max high watermark */ > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 4e91e429b8d1..7ec2dc5c5ea5 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -1228,6 +1228,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, > list_del(&page->pcp_list); > count -= nr_pages; > pcp->total_count -= nr_pages; > + pcp->count[order] -= 1; > > /* MIGRATE_ISOLATE page should not go to pcplists */ > VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); > @@ -2478,6 +2479,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp, > pindex = order_to_pindex(migratetype, order); > list_add(&page->pcp_list, &pcp->lists[pindex]); > pcp->total_count += 1 << order; > + pcp->count[order] += 1; > > batch = READ_ONCE(pcp->batch); > /* > @@ -2858,6 +2860,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, > migratetype, alloc_flags); > > pcp->total_count += alloced << order; > + pcp->count[order] += alloced; > if (unlikely(list_empty(list))) > return NULL; > } > @@ -2865,6 +2868,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, > page = list_first_entry(list, struct page, pcp_list); > list_del(&page->pcp_list); > pcp->total_count -= 1 << order; > + pcp->count[order] -= 1; > } while (check_new_pages(page, order)); > > return page; > diff --git a/mm/vmstat.c b/mm/vmstat.c > index c1e8096ff0a6..e04300ec450f 100644 > --- a/mm/vmstat.c > +++ b/mm/vmstat.c > @@ -1735,19 +1735,25 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, > > seq_printf(m, "\n pagesets"); > for_each_online_cpu(i) { > + int j; > struct per_cpu_pages *pcp; > struct per_cpu_zonestat __maybe_unused *pzstats; > > pcp = per_cpu_ptr(zone->per_cpu_pageset, i); > seq_printf(m, > "\n cpu: %i" > - "\n count: %i" > - "\n high: %i" > - "\n batch: %i", > + "\n total_count: %i", > i, > - pcp->total_count, > - pcp->high, > - pcp->batch); > + pcp->total_count); > + for (j = 0; j < NR_PCP_LISTS; j++) > + seq_printf(m, > + "\n order%-2i: %-3i", > + j, pcp->count[j]); > + seq_printf(m, > + "\n high: %i" > + "\n batch: %i", > + pcp->high, > + pcp->batch); > #ifdef CONFIG_SMP > pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i); > seq_printf(m, "\n vm stats threshold: %d", > -- > 2.31.1 > > >
On Mon, Jan 15, 2024 at 05:34:36PM +0800, Vern Hao wrote: > From: Xin Hao <vernhao@tencent.com> > > With this patch, we can see the distribution of pages of different orders on > each cpu, just like below. > #cat /proc/zoneinfo > .... > cpu: 2 > total_count: 14286 > order0 : 1260 > order1 : 13 > order2 : 42 > order3 : 4 > order4 : 0 > order5 : 0 > order6 : 0 > order7 : 0 > order8 : 0 > order9 : 25 > order10: 0 > order11: 0 > order12: 0 > high: 14541 > batch: 63 > > Signed-off-by: Xin Hao <vernhao@tencent.com> I am not a major fan because increasing the size of a per-cpu structure for debugging purposes incurs a cost for everyone while only a tiny minority may care. There is a mild risk it would break existing parsers of that file although maybe that's not a big deal. However, the same information could be extracted by locking the pcp structures and counting the items per list. It would increase the cost of reading zoneinfo but it's unlikely the file is read at high frequency. If that was a concern, a separate proc file could be used. Finally, the same information likely can be extracted via a systemtap script, a BPF script (if it can get to the right symbols and locking, I didn't check) or via a kernel probe. Even with that information, it's not clear what meaningful action a user can take, so this is a developer-only feature really with a cost incurred for everybody.
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 883168776fea..55d25b4f51e5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -684,6 +684,7 @@ enum zone_watermarks { struct per_cpu_pages { spinlock_t lock; /* Protects lists field */ int total_count; /* total number of pages in the list */ + int count[NR_PCP_LISTS]; /* per-order page counts */ int high; /* high watermark, emptying needed */ int high_min; /* min high watermark */ int high_max; /* max high watermark */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e91e429b8d1..7ec2dc5c5ea5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1228,6 +1228,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, list_del(&page->pcp_list); count -= nr_pages; pcp->total_count -= nr_pages; + pcp->count[order] -= 1; /* MIGRATE_ISOLATE page should not go to pcplists */ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); @@ -2478,6 +2479,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp, pindex = order_to_pindex(migratetype, order); list_add(&page->pcp_list, &pcp->lists[pindex]); pcp->total_count += 1 << order; + pcp->count[order] += 1; batch = READ_ONCE(pcp->batch); /* @@ -2858,6 +2860,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, migratetype, alloc_flags); pcp->total_count += alloced << order; + pcp->count[order] += alloced; if (unlikely(list_empty(list))) return NULL; } @@ -2865,6 +2868,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, page = list_first_entry(list, struct page, pcp_list); list_del(&page->pcp_list); pcp->total_count -= 1 << order; + pcp->count[order] -= 1; } while (check_new_pages(page, order)); return page; diff --git a/mm/vmstat.c b/mm/vmstat.c index c1e8096ff0a6..e04300ec450f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1735,19 +1735,25 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, "\n pagesets"); for_each_online_cpu(i) { + int j; struct per_cpu_pages *pcp; struct per_cpu_zonestat __maybe_unused *pzstats; pcp = per_cpu_ptr(zone->per_cpu_pageset, i); seq_printf(m, "\n cpu: %i" - "\n count: %i" - "\n high: %i" - "\n batch: %i", + "\n total_count: %i", i, - pcp->total_count, - pcp->high, - pcp->batch); + pcp->total_count); + for (j = 0; j < NR_PCP_LISTS; j++) + seq_printf(m, + "\n order%-2i: %-3i", + j, pcp->count[j]); + seq_printf(m, + "\n high: %i" + "\n batch: %i", + pcp->high, + pcp->batch); #ifdef CONFIG_SMP pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i); seq_printf(m, "\n vm stats threshold: %d",