@@ -665,6 +665,9 @@ enum zone_watermarks {
#define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1))
#define HIGHORDER_PCP_LIST_INDEX (NR_LOWORDER_PCP_LISTS - (PAGE_ALLOC_COSTLY_ORDER + 1))
#define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP)
+#ifdef CONFIG_PCP_ORDER_STATS
+#define NR_PCP_ORDER (PAGE_ALLOC_COSTLY_ORDER + NR_PCP_THP + 1)
+#endif
#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
@@ -702,6 +705,9 @@ struct per_cpu_pages {
/* Lists of pages, one per migrate type stored on the pcp-lists */
struct list_head lists[NR_PCP_LISTS];
+#ifdef CONFIG_PCP_ORDER_STATS
+ int per_order_count[NR_PCP_ORDER]; /* per-order page counts */
+#endif
} ____cacheline_aligned_in_smp;
struct per_cpu_zonestat {
@@ -624,4 +624,23 @@ static inline void lruvec_stat_sub_folio(struct folio *folio,
{
lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
}
+
+static inline void pcp_order_stat_mod(struct per_cpu_pages *pcp, int order,
+ int val)
+{
+#ifdef CONFIG_PCP_ORDER_STATS
+ pcp->per_order_count[order] += val;
+#endif
+}
+
+static inline void pcp_order_stat_inc(struct per_cpu_pages *pcp, int order)
+{
+ pcp_order_stat_mod(pcp, order, 1);
+}
+
+static inline void pcp_order_stat_dec(struct per_cpu_pages *pcp, int order)
+{
+ pcp_order_stat_mod(pcp, order, -1);
+}
+
#endif /* _LINUX_VMSTAT_H */
@@ -276,3 +276,11 @@ config PER_VMA_LOCK_STATS
overhead in the page fault path.
If in doubt, say N.
+
+config PCP_ORDER_STATS
+ bool "Statistics for per-order of PCP (Per-CPU pageset)"
+ help
+ Say Y to show per-order statistics of Per-CPU pageset from zoneinfo
+ and pcp_order_stat in sysfs.
+
+ If in doubt, say N.
@@ -599,12 +599,39 @@ DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
DEFINE_MTHP_STAT_ATTR(anon_swpin_refault, MTHP_STAT_ANON_SWPIN_REFAULT);
+#ifdef CONFIG_PCP_ORDER_STATS
+static ssize_t pcp_order_stat_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ int order = to_thpsize(kobj)->order;
+ unsigned int counts = 0;
+ struct zone *zone;
+
+ for_each_populated_zone(zone) {
+ struct per_cpu_pages *pcp;
+ int i;
+
+ for_each_online_cpu(i) {
+ pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
+ counts += pcp->per_order_count[order];
+ }
+ }
+
+ return sysfs_emit(buf, "%u\n", counts);
+}
+
+static struct kobj_attribute pcp_order_stat_attr = __ATTR_RO(pcp_order_stat);
+#endif
+
static struct attribute *stats_attrs[] = {
&anon_alloc_attr.attr,
&anon_alloc_fallback_attr.attr,
&anon_swpout_attr.attr,
&anon_swpout_fallback_attr.attr,
&anon_swpin_refault_attr.attr,
+#ifdef CONFIG_PCP_ORDER_STATS
+ &pcp_order_stat_attr.attr,
+#endif
NULL,
};
@@ -1185,6 +1185,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
list_del(&page->pcp_list);
count -= nr_pages;
pcp->count -= nr_pages;
+ pcp_order_stat_dec(pcp, order);
__free_one_page(page, pfn, zone, order, mt, FPI_NONE);
trace_mm_page_pcpu_drain(page, order, mt);
@@ -2560,6 +2561,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
pindex = order_to_pindex(migratetype, order);
list_add(&page->pcp_list, &pcp->lists[pindex]);
pcp->count += 1 << order;
+ pcp_order_stat_inc(pcp, order);
batch = READ_ONCE(pcp->batch);
/*
@@ -2957,6 +2959,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
migratetype, alloc_flags);
pcp->count += alloced << order;
+ pcp_order_stat_mod(pcp, order, alloced);
if (unlikely(list_empty(list)))
return NULL;
}
@@ -2964,6 +2967,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
page = list_first_entry(list, struct page, pcp_list);
list_del(&page->pcp_list);
pcp->count -= 1 << order;
+ pcp_order_stat_dec(pcp, order);
} while (check_new_pages(page, order));
return page;
@@ -1674,6 +1674,19 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
return false;
}
+static void zoneinfo_show_pcp_order_stat(struct seq_file *m,
+ struct per_cpu_pages *pcp)
+{
+#ifdef CONFIG_PCP_ORDER_STATS
+ int j;
+
+ for (j = 0; j < NR_PCP_ORDER; j++)
+ seq_printf(m,
+ "\n order%d: %i",
+ j, pcp->per_order_count[j]);
+#endif
+}
+
static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
struct zone *zone)
{
@@ -1748,6 +1761,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
pcp->count,
pcp->high,
pcp->batch);
+
+ zoneinfo_show_pcp_order_stat(m, pcp);
+
#ifdef CONFIG_SMP
pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
seq_printf(m, "\n vm stats threshold: %d",
THIS IS ONLY FOR DEBUG. Show more detail about per-order page count on each cpu in zoneinfo, and a new pcp_order_stat shows the total counts of each hugepage size in sysfs. #cat /proc/zoneinfo .... cpu: 15 count: 275 high: 529 batch: 63 order0: 59 order1: 28 order2: 28 order3: 6 order4: 0 order5: 0 order6: 0 order7: 0 order8: 0 order9: 0 #cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/pcp_order_stat 10 Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> --- include/linux/mmzone.h | 6 ++++++ include/linux/vmstat.h | 19 +++++++++++++++++++ mm/Kconfig.debug | 8 ++++++++ mm/huge_memory.c | 27 +++++++++++++++++++++++++++ mm/page_alloc.c | 4 ++++ mm/vmstat.c | 16 ++++++++++++++++ 6 files changed, 80 insertions(+)