diff mbox

Problem: Possible regression in intel_pstate on 3.12

Message ID 52AF3833.1070204@gmail.com (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

dirk.brandewie@gmail.com Dec. 16, 2013, 5:28 p.m. UTC
Hi Joakim,

Add the following patch to your v3.12 kernel and collect some data with the
command and send the resulting perf.data file:
     perf record -a -c 1 -e power:pstate_sample sleep 10


TIA
--Dirk

commit b3dc2c2a106cea68e4c9c0f4747b15291113c4ae
Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
Date:   Mon Dec 2 09:56:46 2013 -0800

     intel_pstate: Add trace point to report internal state.

     Add perf trace event "power:pstate_sample" to report driver state to
     aid in diagnosing issues reported against intel_pstate.

     Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
---
  drivers/cpufreq/intel_pstate.c | 22 ++++++++++++++++++
  include/trace/events/power.h   | 53 ++++++++++++++++++++++++++++++++++++++++++
  2 files changed, 75 insertions(+)

Comments

Joakim Hernberg Jan. 12, 2014, 1:43 a.m. UTC | #1
Hello Dirk,

I don't seem to be able to apply this patch?  For what kernel is it
meant, and is it complete?

Best wishes,

On Mon, 16 Dec 2013 09:28:19 -0800
Dirk Brandewie <dirk.brandewie@gmail.com> wrote:

> Hi Joakim,
> 
> Add the following patch to your v3.12 kernel and collect some data
> with the command and send the resulting perf.data file:
>      perf record -a -c 1 -e power:pstate_sample sleep 10
> 
> 
> TIA
> --Dirk
> 
> commit b3dc2c2a106cea68e4c9c0f4747b15291113c4ae
> Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
> Date:   Mon Dec 2 09:56:46 2013 -0800
> 
>      intel_pstate: Add trace point to report internal state.
> 
>      Add perf trace event "power:pstate_sample" to report driver
> state to aid in diagnosing issues reported against intel_pstate.
> 
>      Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
> ---
>   drivers/cpufreq/intel_pstate.c | 22 ++++++++++++++++++
>   include/trace/events/power.h   | 53
> ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75
> insertions(+)
> 
> diff --git a/drivers/cpufreq/intel_pstate.c
> b/drivers/cpufreq/intel_pstate.c index 5f1cbae..c4f14d1 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -50,6 +50,8 @@ static inline int32_t div_fp(int32_t x, int32_t y)
>   	return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
>   }
> 
> +static u64 energy_divisor;
> +
>   struct sample {
>   	int32_t core_pct_busy;
>   	u64 aperf;
> @@ -512,6 +514,7 @@ static inline void intel_pstate_sample(struct
> cpudata *cpu)
> 
>   	rdmsrl(MSR_IA32_APERF, aperf);
>   	rdmsrl(MSR_IA32_MPERF, mperf);
> +
>   	cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
>   	cpu->samples[cpu->sample_ptr].aperf = aperf;
>   	cpu->samples[cpu->sample_ptr].mperf = mperf;
> @@ -565,10 +568,24 @@ static inline void
> intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
>   static void intel_pstate_timer_func(unsigned long __data)
>   {
>   	struct cpudata *cpu = (struct cpudata *) __data;
> +	struct sample *sample;
> +	u64 energy;
> 
>   	intel_pstate_sample(cpu);
> +
> +	sample = &cpu->samples[cpu->sample_ptr];
> +	rdmsrl(MSR_PKG_ENERGY_STATUS, energy);
> +
>   	intel_pstate_adjust_busy_pstate(cpu);
> 
> +	trace_pstate_sample(fp_toint(sample->core_pct_busy),
> +			fp_toint(intel_pstate_get_scaled_busy(cpu)),
> +			cpu->pstate.current_pstate,
> +			sample->mperf,
> +			sample->aperf,
> +			energy/energy_divisor,
> +			sample->freq);
> +
>   	if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) {
>   		cpu->min_pstate_count++;
>   		if (!(cpu->min_pstate_count % 5)) {
> @@ -849,6 +866,7 @@ static int __init intel_pstate_init(void)
>   	int cpu, rc = 0;
>   	const struct x86_cpu_id *id;
>   	struct cpu_defaults *cpu_info;
> +	u64 units;
> 
>   	if (no_load)
>   		return -ENODEV;
> @@ -882,8 +900,12 @@ static int __init intel_pstate_init(void)
>   	if (rc)
>   		goto out;
> 
> +	rdmsrl(MSR_RAPL_POWER_UNIT, units);
> +	energy_divisor = 1 << ((units >> 8) & 0x1f); /* bits{12:8} */
> +
>   	intel_pstate_debug_expose_params();
>   	intel_pstate_sysfs_expose_params();
> +
>   	return rc;
>   out:
>   	get_online_cpus();
> diff --git a/include/trace/events/power.h
> b/include/trace/events/power.h index cda100d..9e9475c 100644
> --- a/include/trace/events/power.h
> +++ b/include/trace/events/power.h
> @@ -35,6 +35,59 @@ DEFINE_EVENT(cpu, cpu_idle,
>   	TP_ARGS(state, cpu_id)
>   );
> 
> +TRACE_EVENT(pstate_sample,
> +
> +	TP_PROTO(u32 core_busy,
> +		u32 scaled_busy,
> +		u32 state,
> +		u64 mperf,
> +		u64 aperf,
> +		u32 energy,
> +		u32 freq
> +		),
> +
> +	TP_ARGS(core_busy,
> +		scaled_busy,
> +		state,
> +		mperf,
> +		aperf,
> +		energy,
> +		freq
> +		),
> +
> +	TP_STRUCT__entry(
> +		__field(u32, core_busy)
> +		__field(u32, scaled_busy)
> +		__field(u32, state)
> +		__field(u64, mperf)
> +		__field(u64, aperf)
> +		__field(u32, energy)
> +		__field(u32, freq)
> +
> +	),
> +
diff mbox

Patch

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 5f1cbae..c4f14d1 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -50,6 +50,8 @@  static inline int32_t div_fp(int32_t x, int32_t y)
  	return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
  }

+static u64 energy_divisor;
+
  struct sample {
  	int32_t core_pct_busy;
  	u64 aperf;
@@ -512,6 +514,7 @@  static inline void intel_pstate_sample(struct cpudata *cpu)

  	rdmsrl(MSR_IA32_APERF, aperf);
  	rdmsrl(MSR_IA32_MPERF, mperf);
+
  	cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
  	cpu->samples[cpu->sample_ptr].aperf = aperf;
  	cpu->samples[cpu->sample_ptr].mperf = mperf;
@@ -565,10 +568,24 @@  static inline void intel_pstate_adjust_busy_pstate(struct 
cpudata *cpu)
  static void intel_pstate_timer_func(unsigned long __data)
  {
  	struct cpudata *cpu = (struct cpudata *) __data;
+	struct sample *sample;
+	u64 energy;

  	intel_pstate_sample(cpu);
+
+	sample = &cpu->samples[cpu->sample_ptr];
+	rdmsrl(MSR_PKG_ENERGY_STATUS, energy);
+
  	intel_pstate_adjust_busy_pstate(cpu);

+	trace_pstate_sample(fp_toint(sample->core_pct_busy),
+			fp_toint(intel_pstate_get_scaled_busy(cpu)),
+			cpu->pstate.current_pstate,
+			sample->mperf,
+			sample->aperf,
+			energy/energy_divisor,
+			sample->freq);
+
  	if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) {
  		cpu->min_pstate_count++;
  		if (!(cpu->min_pstate_count % 5)) {
@@ -849,6 +866,7 @@  static int __init intel_pstate_init(void)
  	int cpu, rc = 0;
  	const struct x86_cpu_id *id;
  	struct cpu_defaults *cpu_info;
+	u64 units;

  	if (no_load)
  		return -ENODEV;
@@ -882,8 +900,12 @@  static int __init intel_pstate_init(void)
  	if (rc)
  		goto out;

+	rdmsrl(MSR_RAPL_POWER_UNIT, units);
+	energy_divisor = 1 << ((units >> 8) & 0x1f); /* bits{12:8} */
+
  	intel_pstate_debug_expose_params();
  	intel_pstate_sysfs_expose_params();
+
  	return rc;
  out:
  	get_online_cpus();
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index cda100d..9e9475c 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -35,6 +35,59 @@  DEFINE_EVENT(cpu, cpu_idle,
  	TP_ARGS(state, cpu_id)
  );

+TRACE_EVENT(pstate_sample,
+
+	TP_PROTO(u32 core_busy,
+		u32 scaled_busy,
+		u32 state,
+		u64 mperf,
+		u64 aperf,
+		u32 energy,
+		u32 freq
+		),
+
+	TP_ARGS(core_busy,
+		scaled_busy,
+		state,
+		mperf,
+		aperf,
+		energy,
+		freq
+		),
+
+	TP_STRUCT__entry(
+		__field(u32, core_busy)
+		__field(u32, scaled_busy)
+		__field(u32, state)
+		__field(u64, mperf)
+		__field(u64, aperf)
+		__field(u32, energy)
+		__field(u32, freq)
+
+	),
+
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html