Message ID | 20210505111525.121458839@infradead.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | sched,delayacct: Some cleanups | expand |
On Wed, May 05, 2021 at 12:59:43PM +0200, Peter Zijlstra wrote: > The situation around sched_info is somewhat complicated, it is used by > sched_stats and delayacct and, indirectly, kvm. > > If SCHEDSTATS=Y (but disabled by default) sched_info_on() is > unconditionally true -- this is the case for all distro kernel configs > I checked. > > If for some reason SCHEDSTATS=N, but TASK_DELAY_ACCT=Y, then > sched_info_on() can return false when delayacct is disabled, > presumably because there would be no other users left; except kvm is. > > Instead of complicating matters further by accurately accounting > sched_stat and kvm state, simply unconditionally enable when > SCHED_INFO=Y, matching the common distro case. > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
On Wed, May 05, 2021 at 12:59:43PM +0200, Peter Zijlstra wrote: > The situation around sched_info is somewhat complicated, it is used by > sched_stats and delayacct and, indirectly, kvm. > > If SCHEDSTATS=Y (but disabled by default) sched_info_on() is > unconditionally true -- this is the case for all distro kernel configs > I checked. > > If for some reason SCHEDSTATS=N, but TASK_DELAY_ACCT=Y, then > sched_info_on() can return false when delayacct is disabled, > presumably because there would be no other users left; except kvm is. > > Instead of complicating matters further by accurately accounting > sched_stat and kvm state, simply unconditionally enable when > SCHED_INFO=Y, matching the common distro case. > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> > @@ -163,13 +158,12 @@ static inline void sched_info_reset_dequ > */ > static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) > { > - unsigned long long now = rq_clock(rq), delta = 0; > + unsigned long long delta = 0; > > - if (sched_info_on()) { > - if (t->sched_info.last_queued) > - delta = now - t->sched_info.last_queued; > + if (t->sched_info.last_queued) { > + delta = rq_clock(rq) - t->sched_info.last_queued; > + t->sched_info.last_queued = 0; > } > - sched_info_reset_dequeued(t); > t->sched_info.run_delay += delta; > > rq_sched_info_dequeue(rq, delta); As delta is !0 iff t->sched_info.last_queued, why not this? diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 33ffd41935ba..37e33c0eeb7c 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -158,15 +158,14 @@ static inline void psi_sched_switch(struct task_struct *prev, */ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) { - unsigned long long delta = 0; - if (t->sched_info.last_queued) { + unsigned long long delta; + delta = rq_clock(rq) - t->sched_info.last_queued; t->sched_info.last_queued = 0; + t->sched_info.run_delay += delta; + rq_sched_info_dequeue(rq, delta); } - t->sched_info.run_delay += delta; - - rq_sched_info_dequeue(rq, delta); } /* > @@ -184,9 +178,10 @@ static void sched_info_arrive(struct rq > { > unsigned long long now = rq_clock(rq), delta = 0; > > - if (t->sched_info.last_queued) > + if (t->sched_info.last_queued) { > delta = now - t->sched_info.last_queued; > - sched_info_reset_dequeued(t); > + t->sched_info.last_queued = 0; > + } > t->sched_info.run_delay += delta; > t->sched_info.last_arrival = now; > t->sched_info.pcount++; Similarly @@ -176,17 +175,18 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) */ static void sched_info_arrive(struct rq *rq, struct task_struct *t) { - unsigned long long now = rq_clock(rq), delta = 0; + unsigned long long now = rq_clock(rq); if (t->sched_info.last_queued) { + unsigned long long delta; + delta = now - t->sched_info.last_queued; t->sched_info.last_queued = 0; + t->sched_info.run_delay += delta; + rq_sched_info_arrive(rq, delta); } - t->sched_info.run_delay += delta; t->sched_info.last_arrival = now; t->sched_info.pcount++; - - rq_sched_info_arrive(rq, delta); } /*
On Wed, May 12, 2021 at 12:10:40PM +0100, Mel Gorman wrote: > As delta is !0 iff t->sched_info.last_queued, why not this? > > diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h > index 33ffd41935ba..37e33c0eeb7c 100644 > --- a/kernel/sched/stats.h > +++ b/kernel/sched/stats.h > @@ -158,15 +158,14 @@ static inline void psi_sched_switch(struct task_struct *prev, > */ > static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) > { > - unsigned long long delta = 0; > - > if (t->sched_info.last_queued) { > + unsigned long long delta; > + > delta = rq_clock(rq) - t->sched_info.last_queued; > t->sched_info.last_queued = 0; > + t->sched_info.run_delay += delta; > + rq_sched_info_dequeue(rq, delta); > } > - t->sched_info.run_delay += delta; > - > - rq_sched_info_dequeue(rq, delta); > } Right.. clearly I missed the obvious there.. Lemme go add another patch on top of the lot. Something like this I suppose. --- diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 33ffd41935ba..111072ee9663 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -160,10 +160,11 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) { unsigned long long delta = 0; - if (t->sched_info.last_queued) { - delta = rq_clock(rq) - t->sched_info.last_queued; - t->sched_info.last_queued = 0; - } + if (!t->sched_info.last_queued) + return; + + delta = rq_clock(rq) - t->sched_info.last_queued; + t->sched_info.last_queued = 0; t->sched_info.run_delay += delta; rq_sched_info_dequeue(rq, delta); @@ -176,12 +177,14 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) */ static void sched_info_arrive(struct rq *rq, struct task_struct *t) { - unsigned long long now = rq_clock(rq), delta = 0; + unsigned long long now, delta = 0; - if (t->sched_info.last_queued) { - delta = now - t->sched_info.last_queued; - t->sched_info.last_queued = 0; - } + if (!t->sched_info.last_queued) + return; + + now = rq_clock(rq); + delta = now - t->sched_info.last_queued; + t->sched_info.last_queued = 0; t->sched_info.run_delay += delta; t->sched_info.last_arrival = now; t->sched_info.pcount++;
On Wed, May 12, 2021 at 01:32:03PM +0200, Peter Zijlstra wrote: > On Wed, May 12, 2021 at 12:10:40PM +0100, Mel Gorman wrote: > > As delta is !0 iff t->sched_info.last_queued, why not this? > > > > diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h > > index 33ffd41935ba..37e33c0eeb7c 100644 > > --- a/kernel/sched/stats.h > > +++ b/kernel/sched/stats.h > > @@ -158,15 +158,14 @@ static inline void psi_sched_switch(struct task_struct *prev, > > */ > > static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) > > { > > - unsigned long long delta = 0; > > - > > if (t->sched_info.last_queued) { > > + unsigned long long delta; > > + > > delta = rq_clock(rq) - t->sched_info.last_queued; > > t->sched_info.last_queued = 0; > > + t->sched_info.run_delay += delta; > > + rq_sched_info_dequeue(rq, delta); > > } > > - t->sched_info.run_delay += delta; > > - > > - rq_sched_info_dequeue(rq, delta); > > } > > Right.. clearly I missed the obvious there.. Lemme go add another patch > on top of the lot. > > Something like this I suppose. > That works for me.
--- a/include/linux/sched/stat.h +++ b/include/linux/sched/stat.h @@ -3,6 +3,7 @@ #define _LINUX_SCHED_STAT_H #include <linux/percpu.h> +#include <linux/kconfig.h> /* * Various counters maintained by the scheduler and fork(), @@ -23,14 +24,7 @@ extern unsigned long nr_iowait_cpu(int c static inline int sched_info_on(void) { -#ifdef CONFIG_SCHEDSTATS - return 1; -#elif defined(CONFIG_TASK_DELAY_ACCT) - extern int delayacct_on; - return delayacct_on; -#else - return 0; -#endif + return IS_ENABLED(CONFIG_SCHED_INFO); } #ifdef CONFIG_SCHEDSTATS --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -15,7 +15,6 @@ #include <linux/module.h> int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */ -EXPORT_SYMBOL_GPL(delayacct_on); struct kmem_cache *delayacct_cache; static int __init delayacct_setup_disable(char *str) --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -150,11 +150,6 @@ static inline void psi_sched_switch(stru #endif /* CONFIG_PSI */ #ifdef CONFIG_SCHED_INFO -static inline void sched_info_reset_dequeued(struct task_struct *t) -{ - t->sched_info.last_queued = 0; -} - /* * We are interested in knowing how long it was from the *first* time a * task was queued to the time that it finally hit a CPU, we call this routine @@ -163,13 +158,12 @@ static inline void sched_info_reset_dequ */ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) { - unsigned long long now = rq_clock(rq), delta = 0; + unsigned long long delta = 0; - if (sched_info_on()) { - if (t->sched_info.last_queued) - delta = now - t->sched_info.last_queued; + if (t->sched_info.last_queued) { + delta = rq_clock(rq) - t->sched_info.last_queued; + t->sched_info.last_queued = 0; } - sched_info_reset_dequeued(t); t->sched_info.run_delay += delta; rq_sched_info_dequeue(rq, delta); @@ -184,9 +178,10 @@ static void sched_info_arrive(struct rq { unsigned long long now = rq_clock(rq), delta = 0; - if (t->sched_info.last_queued) + if (t->sched_info.last_queued) { delta = now - t->sched_info.last_queued; - sched_info_reset_dequeued(t); + t->sched_info.last_queued = 0; + } t->sched_info.run_delay += delta; t->sched_info.last_arrival = now; t->sched_info.pcount++; @@ -201,10 +196,8 @@ static void sched_info_arrive(struct rq */ static inline void sched_info_enqueue(struct rq *rq, struct task_struct *t) { - if (sched_info_on()) { - if (!t->sched_info.last_queued) - t->sched_info.last_queued = rq_clock(rq); - } + if (!t->sched_info.last_queued) + t->sched_info.last_queued = rq_clock(rq); } /* @@ -231,7 +224,7 @@ static inline void sched_info_depart(str * the idle task.) We are only called when prev != next. */ static inline void -__sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) +sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { /* * prev now departs the CPU. It's not interesting to record @@ -245,18 +238,8 @@ __sched_info_switch(struct rq *rq, struc sched_info_arrive(rq, next); } -static inline void -sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) -{ - if (sched_info_on()) - __sched_info_switch(rq, prev, next); -} - #else /* !CONFIG_SCHED_INFO: */ # define sched_info_enqueue(rq, t) do { } while (0) -# define sched_info_reset_dequeued(t) do { } while (0) # define sched_info_dequeue(rq, t) do { } while (0) -# define sched_info_depart(rq, t) do { } while (0) -# define sched_info_arrive(rq, next) do { } while (0) # define sched_info_switch(rq, t, next) do { } while (0) #endif /* CONFIG_SCHED_INFO */
The situation around sched_info is somewhat complicated, it is used by sched_stats and delayacct and, indirectly, kvm. If SCHEDSTATS=Y (but disabled by default) sched_info_on() is unconditionally true -- this is the case for all distro kernel configs I checked. If for some reason SCHEDSTATS=N, but TASK_DELAY_ACCT=Y, then sched_info_on() can return false when delayacct is disabled, presumably because there would be no other users left; except kvm is. Instead of complicating matters further by accurately accounting sched_stat and kvm state, simply unconditionally enable when SCHED_INFO=Y, matching the common distro case. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> --- include/linux/sched/stat.h | 10 ++-------- kernel/delayacct.c | 1 - kernel/sched/stats.h | 37 ++++++++++--------------------------- 3 files changed, 12 insertions(+), 36 deletions(-)