Message ID | 20240731083655.375293-5-tglozar@redhat.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | rtla: Support idle state disabling via libcpupower in timerlat | expand |
On Wed, 31 Jul 2024 10:36:53 +0200 tglozar@redhat.com wrote: > From: Tomas Glozar <tglozar@redhat.com> > > Add option to limit deepest idle state on CPUs where timerlat is running > for the duration of the workload. > > Signed-off-by: Tomas Glozar <tglozar@redhat.com> > --- > tools/tracing/rtla/src/timerlat_top.c | 46 ++++++++++++++++++++++++++- > 1 file changed, 45 insertions(+), 1 deletion(-) > > diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c > index 8c16419fe22a..ef1d3affef95 100644 > --- a/tools/tracing/rtla/src/timerlat_top.c > +++ b/tools/tracing/rtla/src/timerlat_top.c > @@ -48,6 +48,7 @@ struct timerlat_top_params { > int pretty_output; > int warmup; > int buffer_size; > + int deepest_idle_state; > cpu_set_t hk_cpu_set; > struct sched_attr sched_param; > struct trace_events *events; > @@ -447,7 +448,7 @@ static void timerlat_top_usage(char *usage) > "", > " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", > " [[-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", > - " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s]", > + " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s] [--deepest-idle-state n]", > "", > " -h/--help: print this menu", > " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", > @@ -481,6 +482,7 @@ static void timerlat_top_usage(char *usage) > " -U/--user-load: enable timerlat for user-defined user-space workload", > " --warm-up s: let the workload run for s seconds before collecting data", > " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", Could probably do: #ifdef HAVE_LIBCPUPOWER_SUPPORT > + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", #else + " --deepest-idle-state n: [rtla built without libcpupower, --deepest-idle-state is not supported]", #endif > NULL, > }; > > @@ -518,6 +520,9 @@ static struct timerlat_top_params > /* disabled by default */ > params->dma_latency = -1; > > + /* disabled by default */ > + params->deepest_idle_state = -2; > + > /* display data in microseconds */ > params->output_divisor = 1000; > > @@ -550,6 +555,7 @@ static struct timerlat_top_params > {"aa-only", required_argument, 0, '5'}, > {"warm-up", required_argument, 0, '6'}, > {"trace-buffer-size", required_argument, 0, '7'}, > + {"deepest-idle-state", required_argument, 0, '8'}, > {0, 0, 0, 0} > }; > > @@ -726,6 +732,9 @@ static struct timerlat_top_params > case '7': > params->buffer_size = get_llong_from_str(optarg); > break; > + case '8': > + params->deepest_idle_state = get_llong_from_str(optarg); > + break; > default: > timerlat_top_usage("Invalid option"); > } > @@ -922,6 +931,9 @@ int timerlat_top_main(int argc, char *argv[]) > int return_value = 1; > char *max_lat; > int retval; > +#ifdef HAVE_LIBCPUPOWER_SUPPORT > + int i; > +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ > > params = timerlat_top_parse_args(argc, argv); > if (!params) > @@ -971,6 +983,26 @@ int timerlat_top_main(int argc, char *argv[]) > } > } > > + if (params->deepest_idle_state >= -1) { > +#ifdef HAVE_LIBCPUPOWER_SUPPORT > + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) { > + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) > + continue; > + if (save_cpu_idle_disable_state(i) < 0) { > + err_msg("Could not save cpu idle state.\n"); > + goto out_free; > + } > + if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { > + err_msg("Could not set deepest cpu idle state.\n"); > + goto out_free; > + } > + } > +#else > + err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); > + goto out_free; > +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ We could get rid of most of the ifdefs if you changed the header file to be: #ifdef HAVE_LIBCPUPOWER_SUPPORT int save_cpu_idle_disable_state(unsigned int cpu); int restore_cpu_idle_disable_state(unsigned int cpu); void free_cpu_idle_disable_states(void); int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state); static inline have_libcpower_support(void) { return 1; } #else static inline int save_cpu_idle_disable_state(unsigned int cpu) { return -1 } static inline int restore_cpu_idle_disable_state(unsigned int cpu) { return -1; } static inline void free_cpu_idle_disable_states(void) { } static inline int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state) { return -1 } static inline have_libcpower_support(void) { return 0; } #endif /* HAVE_LIBCPUPOWER_SUPPORT */ Then the above can simply be: if (params->deepest_idle_state >= -1) { if (!have_libcpower_support()) { err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); goto out_free; } for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) { if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) continue; if (save_cpu_idle_disable_state(i) < 0) { err_msg("Could not save cpu idle state.\n"); goto out_free; } if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { err_msg("Could not set deepest cpu idle state.\n"); goto out_free; } } Makes the code much nicer to look at. > + } > + > if (params->trace_output) { > record = osnoise_init_trace_tool("timerlat"); > if (!record) { > @@ -1125,6 +1157,15 @@ int timerlat_top_main(int argc, char *argv[]) > timerlat_aa_destroy(); > if (dma_latency_fd >= 0) > close(dma_latency_fd); > +#ifdef HAVE_LIBCPUPOWER_SUPPORT > + if (params->deepest_idle_state >= -1) { > + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) { You would think gcc may optimize it, but I don't have that much confidence it can or would. You may want to change that to: int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); for (i = 0; i < nr_cpus; i++) { Otherwise you may be calling that sysconf() for each iteration of the loop. -- Steve > + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) > + continue; > + restore_cpu_idle_disable_state(i); > + } > + } > +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ > trace_events_destroy(&record->trace, params->events); > params->events = NULL; > out_free: > @@ -1134,6 +1175,9 @@ int timerlat_top_main(int argc, char *argv[]) > osnoise_destroy_tool(record); > osnoise_destroy_tool(top); > free(params); > +#ifdef HAVE_LIBCPUPOWER_SUPPORT > + free_cpu_idle_disable_states(); > +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ > out_exit: > exit(return_value); > }
> > Could probably do: > > #ifdef HAVE_LIBCPUPOWER_SUPPORT > > + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", > #else > + " --deepest-idle-state n: [rtla built without libcpupower, --deepest-idle-state is not supported]", > #endif > > > NULL, > > }; > > I would still include what the option does, even if not building with libcpupower. I'm not too sure if the help is the place to say the option is unsupported either. I see two perspectives on this matter: one is that the binary does not support libcpupower and should state it in the help, the other is that the version of rtla as a whole does support it, you are just using a build that does not, and as such it should be in the help (you will know it is unsupported when trying to use the option). I suppose we can add a note like this, keeping the help message to inform the user what the option does so that they will rebuild if that want to use it: ``` #ifdef HAVE_LIBCPUPOWER_SUPPORT " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", #else " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency (not supported due to rtla built without libcpupower)", #endif ``` What do you think about that? > > We could get rid of most of the ifdefs if you changed the header file to be: > That's a neat idea, thank you! I know this approach (with defining functions that do nothing when some feature is unavailable) is very commonly used in the kernel to keep the API consistent across different configs, but I didn't think about using it like this here in rtla. > >You would think gcc may optimize it, but I don't have that much confidence >it can or would. You may want to change that to: > > int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); > > for (i = 0; i < nr_cpus; i++) { > >Otherwise you may be calling that sysconf() for each iteration of the loop. > Nah, that is simply an oversight. If GCC optimized that, it would be in fact a GCC bug, since the value of sysconf(_SC_NPROCESSORS_CONF) is external environment and can technically change during the runtime of a program (think of CRIU live migration of the process from one machine to another with a different number of CPUs). Tomas
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 8c16419fe22a..ef1d3affef95 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -48,6 +48,7 @@ struct timerlat_top_params { int pretty_output; int warmup; int buffer_size; + int deepest_idle_state; cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; @@ -447,7 +448,7 @@ static void timerlat_top_usage(char *usage) "", " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", " [[-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", - " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s]", + " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s] [--deepest-idle-state n]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -481,6 +482,7 @@ static void timerlat_top_usage(char *usage) " -U/--user-load: enable timerlat for user-defined user-space workload", " --warm-up s: let the workload run for s seconds before collecting data", " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", NULL, }; @@ -518,6 +520,9 @@ static struct timerlat_top_params /* disabled by default */ params->dma_latency = -1; + /* disabled by default */ + params->deepest_idle_state = -2; + /* display data in microseconds */ params->output_divisor = 1000; @@ -550,6 +555,7 @@ static struct timerlat_top_params {"aa-only", required_argument, 0, '5'}, {"warm-up", required_argument, 0, '6'}, {"trace-buffer-size", required_argument, 0, '7'}, + {"deepest-idle-state", required_argument, 0, '8'}, {0, 0, 0, 0} }; @@ -726,6 +732,9 @@ static struct timerlat_top_params case '7': params->buffer_size = get_llong_from_str(optarg); break; + case '8': + params->deepest_idle_state = get_llong_from_str(optarg); + break; default: timerlat_top_usage("Invalid option"); } @@ -922,6 +931,9 @@ int timerlat_top_main(int argc, char *argv[]) int return_value = 1; char *max_lat; int retval; +#ifdef HAVE_LIBCPUPOWER_SUPPORT + int i; +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ params = timerlat_top_parse_args(argc, argv); if (!params) @@ -971,6 +983,26 @@ int timerlat_top_main(int argc, char *argv[]) } } + if (params->deepest_idle_state >= -1) { +#ifdef HAVE_LIBCPUPOWER_SUPPORT + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + if (save_cpu_idle_disable_state(i) < 0) { + err_msg("Could not save cpu idle state.\n"); + goto out_free; + } + if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { + err_msg("Could not set deepest cpu idle state.\n"); + goto out_free; + } + } +#else + err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); + goto out_free; +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ + } + if (params->trace_output) { record = osnoise_init_trace_tool("timerlat"); if (!record) { @@ -1125,6 +1157,15 @@ int timerlat_top_main(int argc, char *argv[]) timerlat_aa_destroy(); if (dma_latency_fd >= 0) close(dma_latency_fd); +#ifdef HAVE_LIBCPUPOWER_SUPPORT + if (params->deepest_idle_state >= -1) { + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + restore_cpu_idle_disable_state(i); + } + } +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ trace_events_destroy(&record->trace, params->events); params->events = NULL; out_free: @@ -1134,6 +1175,9 @@ int timerlat_top_main(int argc, char *argv[]) osnoise_destroy_tool(record); osnoise_destroy_tool(top); free(params); +#ifdef HAVE_LIBCPUPOWER_SUPPORT + free_cpu_idle_disable_states(); +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ out_exit: exit(return_value); }