Message ID | 1389897145-3479-1-git-send-email-dirk.j.brandewie@intel.com (mailing list archive) |
---|---|
State | Accepted, archived |
Headers | show |
On Thursday, January 16, 2014 10:32:25 AM dirk.brandewie@gmail.com wrote: > From: Dirk Brandewie <dirk.j.brandewie@intel.com> > > Add perf trace event "power:pstate_sample" to report driver state to > aid in diagnosing issues reported against intel_pstate. > > Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com> Queued up for 3.14, thanks! > --- > drivers/cpufreq/intel_pstate.c | 24 +++++++++++++++++++ > include/trace/events/power.h | 53 ++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 77 insertions(+) > > diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c > index fe91dad..7e257b2 100644 > --- a/drivers/cpufreq/intel_pstate.c > +++ b/drivers/cpufreq/intel_pstate.c > @@ -51,6 +51,8 @@ static inline int32_t div_fp(int32_t x, int32_t y) > return div_s64((int64_t)x << FRAC_BITS, (int64_t)y); > } > > +static u64 energy_divisor; > + > struct sample { > int32_t core_pct_busy; > u64 aperf; > @@ -559,6 +561,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu) > > rdmsrl(MSR_IA32_APERF, aperf); > rdmsrl(MSR_IA32_MPERF, mperf); > + > cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; > cpu->samples[cpu->sample_ptr].aperf = aperf; > cpu->samples[cpu->sample_ptr].mperf = mperf; > @@ -603,6 +606,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) > ctl = pid_calc(pid, busy_scaled); > > steps = abs(ctl); > + > if (ctl < 0) > intel_pstate_pstate_increase(cpu, steps); > else > @@ -612,9 +616,24 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) > static void intel_pstate_timer_func(unsigned long __data) > { > struct cpudata *cpu = (struct cpudata *) __data; > + struct sample *sample; > + u64 energy; > > intel_pstate_sample(cpu); > + > + sample = &cpu->samples[cpu->sample_ptr]; > + rdmsrl(MSR_PKG_ENERGY_STATUS, energy); > + > intel_pstate_adjust_busy_pstate(cpu); > + > + trace_pstate_sample(fp_toint(sample->core_pct_busy), > + fp_toint(intel_pstate_get_scaled_busy(cpu)), > + cpu->pstate.current_pstate, > + sample->mperf, > + sample->aperf, > + div64_u64(energy, energy_divisor), > + sample->freq); > + > intel_pstate_set_sample_time(cpu); > } > > @@ -894,6 +913,7 @@ static int __init intel_pstate_init(void) > int cpu, rc = 0; > const struct x86_cpu_id *id; > struct cpu_defaults *cpu_info; > + u64 units; > > if (no_load) > return -ENODEV; > @@ -927,8 +947,12 @@ static int __init intel_pstate_init(void) > if (rc) > goto out; > > + rdmsrl(MSR_RAPL_POWER_UNIT, units); > + energy_divisor = 1 << ((units >> 8) & 0x1f); /* bits{12:8} */ > + > intel_pstate_debug_expose_params(); > intel_pstate_sysfs_expose_params(); > + > return rc; > out: > get_online_cpus(); > diff --git a/include/trace/events/power.h b/include/trace/events/power.h > index cda100d..9e9475c 100644 > --- a/include/trace/events/power.h > +++ b/include/trace/events/power.h > @@ -35,6 +35,59 @@ DEFINE_EVENT(cpu, cpu_idle, > TP_ARGS(state, cpu_id) > ); > > +TRACE_EVENT(pstate_sample, > + > + TP_PROTO(u32 core_busy, > + u32 scaled_busy, > + u32 state, > + u64 mperf, > + u64 aperf, > + u32 energy, > + u32 freq > + ), > + > + TP_ARGS(core_busy, > + scaled_busy, > + state, > + mperf, > + aperf, > + energy, > + freq > + ), > + > + TP_STRUCT__entry( > + __field(u32, core_busy) > + __field(u32, scaled_busy) > + __field(u32, state) > + __field(u64, mperf) > + __field(u64, aperf) > + __field(u32, energy) > + __field(u32, freq) > + > + ), > + > + TP_fast_assign( > + __entry->core_busy = core_busy; > + __entry->scaled_busy = scaled_busy; > + __entry->state = state; > + __entry->mperf = mperf; > + __entry->aperf = aperf; > + __entry->energy = energy; > + __entry->freq = freq; > + ), > + > + TP_printk("core_busy=%lu scaled=%lu state=%lu mperf=%llu aperf=%llu energy=%lu freq=%lu ", > + (unsigned long)__entry->core_busy, > + (unsigned long)__entry->scaled_busy, > + (unsigned long)__entry->state, > + (unsigned long long)__entry->mperf, > + (unsigned long long)__entry->aperf, > + (unsigned long)__entry->energy, > + (unsigned long)__entry->freq > + ) > + > +); > + > /* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */ > #ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING > #define _PWR_EVENT_AVOID_DOUBLE_DEFINING >
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fe91dad..7e257b2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -51,6 +51,8 @@ static inline int32_t div_fp(int32_t x, int32_t y) return div_s64((int64_t)x << FRAC_BITS, (int64_t)y); } +static u64 energy_divisor; + struct sample { int32_t core_pct_busy; u64 aperf; @@ -559,6 +561,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu) rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); + cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; cpu->samples[cpu->sample_ptr].aperf = aperf; cpu->samples[cpu->sample_ptr].mperf = mperf; @@ -603,6 +606,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) ctl = pid_calc(pid, busy_scaled); steps = abs(ctl); + if (ctl < 0) intel_pstate_pstate_increase(cpu, steps); else @@ -612,9 +616,24 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) static void intel_pstate_timer_func(unsigned long __data) { struct cpudata *cpu = (struct cpudata *) __data; + struct sample *sample; + u64 energy; intel_pstate_sample(cpu); + + sample = &cpu->samples[cpu->sample_ptr]; + rdmsrl(MSR_PKG_ENERGY_STATUS, energy); + intel_pstate_adjust_busy_pstate(cpu); + + trace_pstate_sample(fp_toint(sample->core_pct_busy), + fp_toint(intel_pstate_get_scaled_busy(cpu)), + cpu->pstate.current_pstate, + sample->mperf, + sample->aperf, + div64_u64(energy, energy_divisor), + sample->freq); + intel_pstate_set_sample_time(cpu); } @@ -894,6 +913,7 @@ static int __init intel_pstate_init(void) int cpu, rc = 0; const struct x86_cpu_id *id; struct cpu_defaults *cpu_info; + u64 units; if (no_load) return -ENODEV; @@ -927,8 +947,12 @@ static int __init intel_pstate_init(void) if (rc) goto out; + rdmsrl(MSR_RAPL_POWER_UNIT, units); + energy_divisor = 1 << ((units >> 8) & 0x1f); /* bits{12:8} */ + intel_pstate_debug_expose_params(); intel_pstate_sysfs_expose_params(); + return rc; out: get_online_cpus(); diff --git a/include/trace/events/power.h b/include/trace/events/power.h index cda100d..9e9475c 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -35,6 +35,59 @@ DEFINE_EVENT(cpu, cpu_idle, TP_ARGS(state, cpu_id) ); +TRACE_EVENT(pstate_sample, + + TP_PROTO(u32 core_busy, + u32 scaled_busy, + u32 state, + u64 mperf, + u64 aperf, + u32 energy, + u32 freq + ), + + TP_ARGS(core_busy, + scaled_busy, + state, + mperf, + aperf, + energy, + freq + ), + + TP_STRUCT__entry( + __field(u32, core_busy) + __field(u32, scaled_busy) + __field(u32, state) + __field(u64, mperf) + __field(u64, aperf) + __field(u32, energy) + __field(u32, freq) + + ), + + TP_fast_assign( + __entry->core_busy = core_busy; + __entry->scaled_busy = scaled_busy; + __entry->state = state; + __entry->mperf = mperf; + __entry->aperf = aperf; + __entry->energy = energy; + __entry->freq = freq; + ), + + TP_printk("core_busy=%lu scaled=%lu state=%lu mperf=%llu aperf=%llu energy=%lu freq=%lu ", + (unsigned long)__entry->core_busy, + (unsigned long)__entry->scaled_busy, + (unsigned long)__entry->state, + (unsigned long long)__entry->mperf, + (unsigned long long)__entry->aperf, + (unsigned long)__entry->energy, + (unsigned long)__entry->freq + ) + +); + /* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */ #ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING #define _PWR_EVENT_AVOID_DOUBLE_DEFINING