Message ID | 1638619795-71451-1-git-send-email-wang.yong12@zte.com.cn (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v2,linux-next] delayacct: track delays from memory compact | expand |
On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote: > From: wangyong <wang.yong12@zte.com.cn> > > Delay accounting does not track the delay of memory compact. > When there is not enough free memory, tasks can spend > a amount of their time waiting for compact. > > To get the impact of tasks in direct memory compact, measure > the delay when allocating memory through memory compact. > Should we call this DIRECT_COMPACT and through documentation or name change imply that this won't work for kcompactd the kernel thread - based on my reading of the patches. > Also update tools/accounting/getdelays.c: > / # ./getdelays_next -di -p 304 > print delayacct stats ON > printing IO accounting > PID 304 > > CPU count real total virtual total delay total delay average > 277 780000000 849039485 18877296 0.068ms > IO count delay total delay average > 0 0 0ms > SWAP count delay total delay average > 0 0 0ms > RECLAIM count delay total delay average > 5 11088812685 2217ms > THRASHING count delay total delay average > 0 0 0ms > COMPACT count delay total delay average > 3 72758 0ms > watch: read=0, write=0, cancelled_write=0 > > Reported-by: Zeal Robot <zealci@zte.com.cn> > Reported-by: kernel test robot <lkp@intel.com> > Signed-off-by: wangyong <wang.yong12@zte.com.cn> > Reviewed-by: Andrew Morton <akpm@linux-foundation.org> > Reviewed-by: Jiang Xuexin <jiang.xuexin@zte.com.cn> > Reviewed-by: Zhang Wenya <zhang.wenya1@zte.com.cn> > Reviewed-by: Yang Yang <yang.yang29@zte.com.cn> > --- > > Changes since v1: > -fixed duplicate member freepages_start. > > include/linux/delayacct.h | 28 ++++++++++++++++++++++++++++ > include/uapi/linux/taskstats.h | 6 +++++- > kernel/delayacct.c | 15 +++++++++++++++ > mm/page_alloc.c | 3 +++ > tools/accounting/getdelays.c | 8 +++++++- > 5 files changed, 58 insertions(+), 2 deletions(-) > > diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h > index 435c365..3e03d01 100644 > --- a/include/linux/delayacct.h > +++ b/include/linux/delayacct.h > @@ -42,8 +42,12 @@ struct task_delay_info { > u64 thrashing_start; > u64 thrashing_delay; /* wait for thrashing page */ > > + u64 compact_start; > + u64 compact_delay; /* wait for memory compact */ > + > u32 freepages_count; /* total count of memory reclaim */ > u32 thrashing_count; /* total count of thrash waits */ > + u32 compact_count; /* total count of memory compact */ > }; > #endif > > @@ -72,6 +76,8 @@ extern void __delayacct_thrashing_start(void); > extern void __delayacct_thrashing_end(void); > extern void __delayacct_swapin_start(void); > extern void __delayacct_swapin_end(void); > +extern void __delayacct_compact_start(void); > +extern void __delayacct_compact_end(void); > > static inline void delayacct_tsk_init(struct task_struct *tsk) > { > @@ -170,6 +176,24 @@ static inline void delayacct_swapin_end(void) > __delayacct_swapin_end(); > } > > +static inline void delayacct_compact_start(void) > +{ > + if (!static_branch_unlikely(&delayacct_key)) > + return; > + > + if (current->delays) > + __delayacct_compact_start(); > +} > + > +static inline void delayacct_compact_end(void) > +{ > + if (!static_branch_unlikely(&delayacct_key)) > + return; > + > + if (current->delays) > + __delayacct_compact_end(); > +} > + > #else > static inline void delayacct_init(void) > {} > @@ -200,6 +224,10 @@ static inline void delayacct_swapin_start(void) > {} > static inline void delayacct_swapin_end(void) > {} > +static inline void delayacct_compact_start(void) > +{} > +static inline void delayacct_compact_end(void) > +{} > > #endif /* CONFIG_TASK_DELAY_ACCT */ > > diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h > index ccbd087..12327d3 100644 > --- a/include/uapi/linux/taskstats.h > +++ b/include/uapi/linux/taskstats.h > @@ -34,7 +34,7 @@ > */ > > > -#define TASKSTATS_VERSION 10 > +#define TASKSTATS_VERSION 11 > #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN > * in linux/sched.h */ > > @@ -172,6 +172,10 @@ struct taskstats { > > /* v10: 64-bit btime to avoid overflow */ > __u64 ac_btime64; /* 64-bit begin time */ > + > + /* Delay waiting for memory compact */ > + __u64 compact_count; > + __u64 compact_delay_total; > }; > > > diff --git a/kernel/delayacct.c b/kernel/delayacct.c > index 11f3cd8..c5e8cea 100644 > --- a/kernel/delayacct.c > +++ b/kernel/delayacct.c > @@ -155,10 +155,13 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) > d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; > tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay; > d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; > + tmp = d->compact_delay_total + tsk->delays->compact_delay; > + d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; > d->blkio_count += tsk->delays->blkio_count; > d->swapin_count += tsk->delays->swapin_count; > d->freepages_count += tsk->delays->freepages_count; > d->thrashing_count += tsk->delays->thrashing_count; > + d->compact_count += tsk->delays->compact_count; > raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); > > return 0; > @@ -214,3 +217,15 @@ void __delayacct_swapin_end(void) > ¤t->delays->swapin_count); > } > > +void __delayacct_compact_start(void) > +{ > + current->delays->compact_start = local_clock(); > +} > + > +void __delayacct_compact_end(void) > +{ > + delayacct_end(¤t->delays->lock, > + ¤t->delays->compact_start, > + ¤t->delays->compact_delay, > + ¤t->delays->compact_count); > +} > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index edfd6c8..6430226 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -74,6 +74,7 @@ > #include <linux/padata.h> > #include <linux/khugepaged.h> > #include <linux/buffer_head.h> > +#include <linux/delayacct.h> > #include <asm/sections.h> > #include <asm/tlbflush.h> > #include <asm/div64.h> > @@ -4363,6 +4364,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, > return NULL; > > psi_memstall_enter(&pflags); > + delayacct_compact_start(); > noreclaim_flag = memalloc_noreclaim_save(); > > *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, > @@ -4370,6 +4372,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, > > memalloc_noreclaim_restore(noreclaim_flag); > psi_memstall_leave(&pflags); > + delayacct_compact_end(); > > if (*compact_result == COMPACT_SKIPPED) > return NULL; > diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c > index 5ef1c15..11e8673 100644 > --- a/tools/accounting/getdelays.c > +++ b/tools/accounting/getdelays.c > @@ -205,6 +205,8 @@ static void print_delayacct(struct taskstats *t) > "RECLAIM %12s%15s%15s\n" > " %15llu%15llu%15llums\n" > "THRASHING%12s%15s%15s\n" > + " %15llu%15llu%15llums\n" > + "COMPACT %12s%15s%15s\n" > " %15llu%15llu%15llums\n", > "count", "real total", "virtual total", > "delay total", "delay average", > @@ -228,7 +230,11 @@ static void print_delayacct(struct taskstats *t) > "count", "delay total", "delay average", > (unsigned long long)t->thrashing_count, > (unsigned long long)t->thrashing_delay_total, > - average_ms(t->thrashing_delay_total, t->thrashing_count)); > + average_ms(t->thrashing_delay_total, t->thrashing_count), > + "count", "delay total", "delay average", > + (unsigned long long)t->compact_count, > + (unsigned long long)t->compact_delay_total, > + average_ms(t->compact_delay_total, t->compact_count)); > } > > static void task_context_switch_counts(struct taskstats *t) > -- > 2.7.4 > At some point we should make the user space tool version aware. Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道: > > On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote: > > From: wangyong <wang.yong12@zte.com.cn> > > > > Delay accounting does not track the delay of memory compact. > > When there is not enough free memory, tasks can spend > > a amount of their time waiting for compact. > > > > To get the impact of tasks in direct memory compact, measure > > the delay when allocating memory through memory compact. > > > > Should we call this DIRECT_COMPACT and through documentation > or name change imply that this won't work for kcompactd the > kernel thread - based on my reading of the patches. > Using DIRECT_COMPACT is a little redundant,because the delayacct stats of delay accounting is specific to tasks, it has nothing to do with kcompactd, which is similar to the RECLAIM field. As for the document, consider submitting a separate patch with the thrashing delay description added. > > Also update tools/accounting/getdelays.c: > > / # ./getdelays_next -di -p 304 > > print delayacct stats ON > > printing IO accounting > > PID 304 > > > > CPU count real total virtual total delay total delay average > > 277 780000000 849039485 18877296 0.068ms > > IO count delay total delay average > > 0 0 0ms > > SWAP count delay total delay average > > 0 0 0ms > > RECLAIM count delay total delay average > > 5 11088812685 2217ms > > THRASHING count delay total delay average > > 0 0 0ms > > COMPACT count delay total delay average > > 3 72758 0ms > > watch: read=0, write=0, cancelled_write=0 > > > > Reported-by: Zeal Robot <zealci@zte.com.cn> > > Reported-by: kernel test robot <lkp@intel.com> > > Signed-off-by: wangyong <wang.yong12@zte.com.cn> > > Reviewed-by: Andrew Morton <akpm@linux-foundation.org> > > Reviewed-by: Jiang Xuexin <jiang.xuexin@zte.com.cn> > > Reviewed-by: Zhang Wenya <zhang.wenya1@zte.com.cn> > > Reviewed-by: Yang Yang <yang.yang29@zte.com.cn> > > --- > > > > Changes since v1: > > -fixed duplicate member freepages_start. > > > > include/linux/delayacct.h | 28 ++++++++++++++++++++++++++++ > > include/uapi/linux/taskstats.h | 6 +++++- > > kernel/delayacct.c | 15 +++++++++++++++ > > mm/page_alloc.c | 3 +++ > > tools/accounting/getdelays.c | 8 +++++++- > > 5 files changed, 58 insertions(+), 2 deletions(-) > > > > diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h > > index 435c365..3e03d01 100644 > > --- a/include/linux/delayacct.h > > +++ b/include/linux/delayacct.h > > @@ -42,8 +42,12 @@ struct task_delay_info { > > u64 thrashing_start; > > u64 thrashing_delay; /* wait for thrashing page */ > > > > + u64 compact_start; > > + u64 compact_delay; /* wait for memory compact */ > > + > > u32 freepages_count; /* total count of memory reclaim */ > > u32 thrashing_count; /* total count of thrash waits */ > > + u32 compact_count; /* total count of memory compact */ > > }; > > #endif > > > > @@ -72,6 +76,8 @@ extern void __delayacct_thrashing_start(void); > > extern void __delayacct_thrashing_end(void); > > extern void __delayacct_swapin_start(void); > > extern void __delayacct_swapin_end(void); > > +extern void __delayacct_compact_start(void); > > +extern void __delayacct_compact_end(void); > > > > static inline void delayacct_tsk_init(struct task_struct *tsk) > > { > > @@ -170,6 +176,24 @@ static inline void delayacct_swapin_end(void) > > __delayacct_swapin_end(); > > } > > > > +static inline void delayacct_compact_start(void) > > +{ > > + if (!static_branch_unlikely(&delayacct_key)) > > + return; > > + > > + if (current->delays) > > + __delayacct_compact_start(); > > +} > > + > > +static inline void delayacct_compact_end(void) > > +{ > > + if (!static_branch_unlikely(&delayacct_key)) > > + return; > > + > > + if (current->delays) > > + __delayacct_compact_end(); > > +} > > + > > #else > > static inline void delayacct_init(void) > > {} > > @@ -200,6 +224,10 @@ static inline void delayacct_swapin_start(void) > > {} > > static inline void delayacct_swapin_end(void) > > {} > > +static inline void delayacct_compact_start(void) > > +{} > > +static inline void delayacct_compact_end(void) > > +{} > > > > #endif /* CONFIG_TASK_DELAY_ACCT */ > > > > diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h > > index ccbd087..12327d3 100644 > > --- a/include/uapi/linux/taskstats.h > > +++ b/include/uapi/linux/taskstats.h > > @@ -34,7 +34,7 @@ > > */ > > > > > > -#define TASKSTATS_VERSION 10 > > +#define TASKSTATS_VERSION 11 > > #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN > > * in linux/sched.h */ > > > > @@ -172,6 +172,10 @@ struct taskstats { > > > > /* v10: 64-bit btime to avoid overflow */ > > __u64 ac_btime64; /* 64-bit begin time */ > > + > > + /* Delay waiting for memory compact */ > > + __u64 compact_count; > > + __u64 compact_delay_total; > > }; > > > > > > diff --git a/kernel/delayacct.c b/kernel/delayacct.c > > index 11f3cd8..c5e8cea 100644 > > --- a/kernel/delayacct.c > > +++ b/kernel/delayacct.c > > @@ -155,10 +155,13 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) > > d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; > > tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay; > > d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; > > + tmp = d->compact_delay_total + tsk->delays->compact_delay; > > + d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; > > d->blkio_count += tsk->delays->blkio_count; > > d->swapin_count += tsk->delays->swapin_count; > > d->freepages_count += tsk->delays->freepages_count; > > d->thrashing_count += tsk->delays->thrashing_count; > > + d->compact_count += tsk->delays->compact_count; > > raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); > > > > return 0; > > @@ -214,3 +217,15 @@ void __delayacct_swapin_end(void) > > ¤t->delays->swapin_count); > > } > > > > +void __delayacct_compact_start(void) > > +{ > > + current->delays->compact_start = local_clock(); > > +} > > + > > +void __delayacct_compact_end(void) > > +{ > > + delayacct_end(¤t->delays->lock, > > + ¤t->delays->compact_start, > > + ¤t->delays->compact_delay, > > + ¤t->delays->compact_count); > > +} > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > > index edfd6c8..6430226 100644 > > --- a/mm/page_alloc.c > > +++ b/mm/page_alloc.c > > @@ -74,6 +74,7 @@ > > #include <linux/padata.h> > > #include <linux/khugepaged.h> > > #include <linux/buffer_head.h> > > +#include <linux/delayacct.h> > > #include <asm/sections.h> > > #include <asm/tlbflush.h> > > #include <asm/div64.h> > > @@ -4363,6 +4364,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, > > return NULL; > > > > psi_memstall_enter(&pflags); > > + delayacct_compact_start(); > > noreclaim_flag = memalloc_noreclaim_save(); > > > > *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, > > @@ -4370,6 +4372,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, > > > > memalloc_noreclaim_restore(noreclaim_flag); > > psi_memstall_leave(&pflags); > > + delayacct_compact_end(); > > > > if (*compact_result == COMPACT_SKIPPED) > > return NULL; > > diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c > > index 5ef1c15..11e8673 100644 > > --- a/tools/accounting/getdelays.c > > +++ b/tools/accounting/getdelays.c > > @@ -205,6 +205,8 @@ static void print_delayacct(struct taskstats *t) > > "RECLAIM %12s%15s%15s\n" > > " %15llu%15llu%15llums\n" > > "THRASHING%12s%15s%15s\n" > > + " %15llu%15llu%15llums\n" > > + "COMPACT %12s%15s%15s\n" > > " %15llu%15llu%15llums\n", > > "count", "real total", "virtual total", > > "delay total", "delay average", > > @@ -228,7 +230,11 @@ static void print_delayacct(struct taskstats *t) > > "count", "delay total", "delay average", > > (unsigned long long)t->thrashing_count, > > (unsigned long long)t->thrashing_delay_total, > > - average_ms(t->thrashing_delay_total, t->thrashing_count)); > > + average_ms(t->thrashing_delay_total, t->thrashing_count), > > + "count", "delay total", "delay average", > > + (unsigned long long)t->compact_count, > > + (unsigned long long)t->compact_delay_total, > > + average_ms(t->compact_delay_total, t->compact_count)); > > } > > > > static void task_context_switch_counts(struct taskstats *t) > > -- > > 2.7.4 > > > > At some point we should make the user space tool version aware. > I think this tool is generally used with the corresponding kernel, and TASKSTATS_VERSION is used to indicate that the structure has changed which has been modified in this patch. > Reviewed-by: Balbir Singh <bsingharora@gmail.com> Thanks for your reply!
On Sun, Dec 05, 2021 at 07:08:02PM +0800, yong w wrote: > Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道: > > > > On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote: > > > From: wangyong <wang.yong12@zte.com.cn> > > > > > > Delay accounting does not track the delay of memory compact. > > > When there is not enough free memory, tasks can spend > > > a amount of their time waiting for compact. > > > > > > To get the impact of tasks in direct memory compact, measure > > > the delay when allocating memory through memory compact. > > > > > > > Should we call this DIRECT_COMPACT and through documentation > > or name change imply that this won't work for kcompactd the > > kernel thread - based on my reading of the patches. > > > Using DIRECT_COMPACT is a little redundant,because the > delayacct stats of delay accounting is specific to tasks, it has > nothing to do with kcompactd, which is similar to the RECLAIM field. > What would we expect when we call delayacct -p <pidof kcompactd> to be output? Yes, I agree with your comment on the reclaim field. Don't feel to strongly, but it can be confusing that kcompactd has spent no time in compact'ing? Not that delayacct is used for kernel threads, but I am not sure if that use case exists today. <snip> Balbir Singh.
Balbir Singh <bsingharora@gmail.com> 于2021年12月7日周二 13:16写道: > > On Sun, Dec 05, 2021 at 07:08:02PM +0800, yong w wrote: > > Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道: > > > > > > On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote: > > > > From: wangyong <wang.yong12@zte.com.cn> > > > > > > > > Delay accounting does not track the delay of memory compact. > > > > When there is not enough free memory, tasks can spend > > > > a amount of their time waiting for compact. > > > > > > > > To get the impact of tasks in direct memory compact, measure > > > > the delay when allocating memory through memory compact. > > > > > > > > > > Should we call this DIRECT_COMPACT and through documentation > > > or name change imply that this won't work for kcompactd the > > > kernel thread - based on my reading of the patches. > > > > > Using DIRECT_COMPACT is a little redundant,because the > > delayacct stats of delay accounting is specific to tasks, it has > > nothing to do with kcompactd, which is similar to the RECLAIM field. > > > > What would we expect when we call delayacct -p <pidof kcompactd> > to be output? If the slow path of memory allocation is invoked in the kcompacd process, there may be delays being recorded. > Don't feel to strongly, but it can be confusing that kcompactd > has spent no time in compact'ing? Not that delayacct is used for > kernel threads, but I am not sure if that use case exists today. Yes, delayacct does not restrict the process of obtaining information, but kcompactd is used for compaction, the compact delay of kcompatd is not actually a delay.Maybe it can be added to the document later to make it clearer. Thanks for your reply!
Hello, is this patch OK? Thanks. yong w <yongw.pur@gmail.com> 于2021年12月8日周三 00:50写道: > > Balbir Singh <bsingharora@gmail.com> 于2021年12月7日周二 13:16写道: > > > > On Sun, Dec 05, 2021 at 07:08:02PM +0800, yong w wrote: > > > Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道: > > > > > > > > On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote: > > > > > From: wangyong <wang.yong12@zte.com.cn> > > > > > > > > > > Delay accounting does not track the delay of memory compact. > > > > > When there is not enough free memory, tasks can spend > > > > > a amount of their time waiting for compact. > > > > > > > > > > To get the impact of tasks in direct memory compact, measure > > > > > the delay when allocating memory through memory compact. > > > > > > > > > > > > > Should we call this DIRECT_COMPACT and through documentation > > > > or name change imply that this won't work for kcompactd the > > > > kernel thread - based on my reading of the patches. > > > > > > > Using DIRECT_COMPACT is a little redundant,because the > > > delayacct stats of delay accounting is specific to tasks, it has > > > nothing to do with kcompactd, which is similar to the RECLAIM field. > > > > > > > What would we expect when we call delayacct -p <pidof kcompactd> > > to be output? > If the slow path of memory allocation is invoked in the kcompacd process, > there may be delays being recorded. > > > Don't feel to strongly, but it can be confusing that kcompactd > > has spent no time in compact'ing? Not that delayacct is used for > > kernel threads, but I am not sure if that use case exists today. > Yes, delayacct does not restrict the process of obtaining information, > but kcompactd is used for compaction, the compact delay of > kcompatd is not actually a delay.Maybe it can be added to the > document later to make it clearer. > > Thanks for your reply!
On Mon, Dec 13, 2021 at 09:56:08PM +0800, yong w wrote: > Hello, is this patch OK? > > Thanks. > > yong w <yongw.pur@gmail.com> 于2021年12月8日周三 00:50写道: > > > > Balbir Singh <bsingharora@gmail.com> 于2021年12月7日周二 13:16写道: > > > > > > On Sun, Dec 05, 2021 at 07:08:02PM +0800, yong w wrote: > > > > Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道: > > > > > > > > > > On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote: > > > > > > From: wangyong <wang.yong12@zte.com.cn> > > > > > > > > > > > > Delay accounting does not track the delay of memory compact. > > > > > > When there is not enough free memory, tasks can spend > > > > > > a amount of their time waiting for compact. > > > > > > > > > > > > To get the impact of tasks in direct memory compact, measure > > > > > > the delay when allocating memory through memory compact. > > > > > > > > > > > > > > > > Should we call this DIRECT_COMPACT and through documentation > > > > > or name change imply that this won't work for kcompactd the > > > > > kernel thread - based on my reading of the patches. > > > > > > > > > Using DIRECT_COMPACT is a little redundant,because the > > > > delayacct stats of delay accounting is specific to tasks, it has > > > > nothing to do with kcompactd, which is similar to the RECLAIM field. > > > > > > > > > > What would we expect when we call delayacct -p <pidof kcompactd> > > > to be output? > > If the slow path of memory allocation is invoked in the kcompacd process, > > there may be delays being recorded. > > > > > Don't feel to strongly, but it can be confusing that kcompactd > > > has spent no time in compact'ing? Not that delayacct is used for > > > kernel threads, but I am not sure if that use case exists today. > > Yes, delayacct does not restrict the process of obtaining information, > > but kcompactd is used for compaction, the compact delay of > > kcompatd is not actually a delay.Maybe it can be added to the > > document later to make it clearer. > > > > Thanks for your reply! Please avoid top posting, when you say added later, I presume more patches for documentation are coming. I am OK with the patch in that case. Reviewed-by: Balbir Singh <bsingharora@gmail.com> Balbir Singh
Balbir Singh <bsingharora@gmail.com> 于2021年12月14日周二 13:49写道: > > On Mon, Dec 13, 2021 at 09:56:08PM +0800, yong w wrote: > > Hello, is this patch OK? > > > > Thanks. > > > > yong w <yongw.pur@gmail.com> 于2021年12月8日周三 00:50写道: > > > > > > Balbir Singh <bsingharora@gmail.com> 于2021年12月7日周二 13:16写道: > > > > > > > > On Sun, Dec 05, 2021 at 07:08:02PM +0800, yong w wrote: > > > > > Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道: > > > > > > > > > > > > On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote: > > > > > > > From: wangyong <wang.yong12@zte.com.cn> > > > > > > > > > > > > > > Delay accounting does not track the delay of memory compact. > > > > > > > When there is not enough free memory, tasks can spend > > > > > > > a amount of their time waiting for compact. > > > > > > > > > > > > > > To get the impact of tasks in direct memory compact, measure > > > > > > > the delay when allocating memory through memory compact. > > > > > > > > > > > > > > > > > > > Should we call this DIRECT_COMPACT and through documentation > > > > > > or name change imply that this won't work for kcompactd the > > > > > > kernel thread - based on my reading of the patches. > > > > > > > > > > > Using DIRECT_COMPACT is a little redundant,because the > > > > > delayacct stats of delay accounting is specific to tasks, it has > > > > > nothing to do with kcompactd, which is similar to the RECLAIM field. > > > > > > > > > > > > > What would we expect when we call delayacct -p <pidof kcompactd> > > > > to be output? > > > If the slow path of memory allocation is invoked in the kcompacd process, > > > there may be delays being recorded. > > > > > > > Don't feel to strongly, but it can be confusing that kcompactd > > > > has spent no time in compact'ing? Not that delayacct is used for > > > > kernel threads, but I am not sure if that use case exists today. > > > Yes, delayacct does not restrict the process of obtaining information, > > > but kcompactd is used for compaction, the compact delay of > > > kcompatd is not actually a delay.Maybe it can be added to the > > > document later to make it clearer. > > > > > > Thanks for your reply! > > Please avoid top posting, when you say added later, I presume more > patches for documentation are coming. I am OK with the patch in that > case. > > Reviewed-by: Balbir Singh <bsingharora@gmail.com> > > Balbir Singh OK,Thank you!
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 435c365..3e03d01 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -42,8 +42,12 @@ struct task_delay_info { u64 thrashing_start; u64 thrashing_delay; /* wait for thrashing page */ + u64 compact_start; + u64 compact_delay; /* wait for memory compact */ + u32 freepages_count; /* total count of memory reclaim */ u32 thrashing_count; /* total count of thrash waits */ + u32 compact_count; /* total count of memory compact */ }; #endif @@ -72,6 +76,8 @@ extern void __delayacct_thrashing_start(void); extern void __delayacct_thrashing_end(void); extern void __delayacct_swapin_start(void); extern void __delayacct_swapin_end(void); +extern void __delayacct_compact_start(void); +extern void __delayacct_compact_end(void); static inline void delayacct_tsk_init(struct task_struct *tsk) { @@ -170,6 +176,24 @@ static inline void delayacct_swapin_end(void) __delayacct_swapin_end(); } +static inline void delayacct_compact_start(void) +{ + if (!static_branch_unlikely(&delayacct_key)) + return; + + if (current->delays) + __delayacct_compact_start(); +} + +static inline void delayacct_compact_end(void) +{ + if (!static_branch_unlikely(&delayacct_key)) + return; + + if (current->delays) + __delayacct_compact_end(); +} + #else static inline void delayacct_init(void) {} @@ -200,6 +224,10 @@ static inline void delayacct_swapin_start(void) {} static inline void delayacct_swapin_end(void) {} +static inline void delayacct_compact_start(void) +{} +static inline void delayacct_compact_end(void) +{} #endif /* CONFIG_TASK_DELAY_ACCT */ diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index ccbd087..12327d3 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 10 +#define TASKSTATS_VERSION 11 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -172,6 +172,10 @@ struct taskstats { /* v10: 64-bit btime to avoid overflow */ __u64 ac_btime64; /* 64-bit begin time */ + + /* Delay waiting for memory compact */ + __u64 compact_count; + __u64 compact_delay_total; }; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 11f3cd8..c5e8cea 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -155,10 +155,13 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay; d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; + tmp = d->compact_delay_total + tsk->delays->compact_delay; + d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; d->swapin_count += tsk->delays->swapin_count; d->freepages_count += tsk->delays->freepages_count; d->thrashing_count += tsk->delays->thrashing_count; + d->compact_count += tsk->delays->compact_count; raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return 0; @@ -214,3 +217,15 @@ void __delayacct_swapin_end(void) ¤t->delays->swapin_count); } +void __delayacct_compact_start(void) +{ + current->delays->compact_start = local_clock(); +} + +void __delayacct_compact_end(void) +{ + delayacct_end(¤t->delays->lock, + ¤t->delays->compact_start, + ¤t->delays->compact_delay, + ¤t->delays->compact_count); +} diff --git a/mm/page_alloc.c b/mm/page_alloc.c index edfd6c8..6430226 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -74,6 +74,7 @@ #include <linux/padata.h> #include <linux/khugepaged.h> #include <linux/buffer_head.h> +#include <linux/delayacct.h> #include <asm/sections.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -4363,6 +4364,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; psi_memstall_enter(&pflags); + delayacct_compact_start(); noreclaim_flag = memalloc_noreclaim_save(); *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, @@ -4370,6 +4372,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, memalloc_noreclaim_restore(noreclaim_flag); psi_memstall_leave(&pflags); + delayacct_compact_end(); if (*compact_result == COMPACT_SKIPPED) return NULL; diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 5ef1c15..11e8673 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -205,6 +205,8 @@ static void print_delayacct(struct taskstats *t) "RECLAIM %12s%15s%15s\n" " %15llu%15llu%15llums\n" "THRASHING%12s%15s%15s\n" + " %15llu%15llu%15llums\n" + "COMPACT %12s%15s%15s\n" " %15llu%15llu%15llums\n", "count", "real total", "virtual total", "delay total", "delay average", @@ -228,7 +230,11 @@ static void print_delayacct(struct taskstats *t) "count", "delay total", "delay average", (unsigned long long)t->thrashing_count, (unsigned long long)t->thrashing_delay_total, - average_ms(t->thrashing_delay_total, t->thrashing_count)); + average_ms(t->thrashing_delay_total, t->thrashing_count), + "count", "delay total", "delay average", + (unsigned long long)t->compact_count, + (unsigned long long)t->compact_delay_total, + average_ms(t->compact_delay_total, t->compact_count)); } static void task_context_switch_counts(struct taskstats *t)