diff mbox

[1/6] cpufreq: Add sampling window for ondemand governor

Message ID 1293085026-18173-2-git-send-email-youquan.song@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Youquan Song Dec. 23, 2010, 6:17 a.m. UTC
None
diff mbox

Patch

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index c631f27..e49b2e1 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -22,6 +22,7 @@ 
 #include <linux/tick.h>
 #include <linux/ktime.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 
 /*
  * dbs is used in this file as a shortform for demandbased switching
@@ -37,6 +38,14 @@ 
 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE		(10000)
 #define MIN_FREQUENCY_UP_THRESHOLD		(11)
 #define MAX_FREQUENCY_UP_THRESHOLD		(100)
+/*Default sampling window : 1 second */
+#define DEF_SAMPLING_WINDOW			(1000000)
+
+/* Max number of history records */
+#define MAX_LOAD_RECORD_NUM			(150)
+
+#define SAMPLING_WINDOW_UP_THRESHOLD		(80)
+#define SAMPLING_WINDOW_DOWN_DIFFERENTIAL	(20)
 
 /*
  * The polling frequency of this governor depends on the capability of
@@ -73,6 +82,13 @@  struct cpufreq_governor cpufreq_gov_ondemand = {
 /* Sampling types */
 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
 
+/* Sampling record */
+struct load_record {
+	unsigned long load_freq;
+	unsigned int wall_time;
+	unsigned int idle_time;
+};
+
 struct cpu_dbs_info_s {
 	cputime64_t prev_cpu_idle;
 	cputime64_t prev_cpu_iowait;
@@ -81,6 +97,13 @@  struct cpu_dbs_info_s {
 	struct cpufreq_policy *cur_policy;
 	struct delayed_work work;
 	struct cpufreq_frequency_table *freq_table;
+	struct load_record *lr;		/* Load history record */
+	unsigned long total_load;	/* Sum of load in sampling window */
+	unsigned int total_wtime;	/* Sum of time in sampling window */
+	unsigned int total_itime;	/* Sum of idle time in sampling window*/
+	unsigned int start_p;		/* Start position of sampling window */
+	unsigned int cur_p;		/* Current position of sampling window*/
+	unsigned int cur_sw;		/* Current sampling window size */
 	unsigned int freq_lo;
 	unsigned int freq_lo_jiffies;
 	unsigned int freq_hi_jiffies;
@@ -97,6 +120,7 @@  struct cpu_dbs_info_s {
 static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
 
 static unsigned int dbs_enable;	/* number of CPUs using this policy */
+static unsigned int sampling_window_enable; /* only use in HW_ALL */
 
 /*
  * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on
@@ -114,12 +138,16 @@  static struct dbs_tuners {
 	unsigned int sampling_down_factor;
 	unsigned int powersave_bias;
 	unsigned int io_is_busy;
+	unsigned int sampling_window;
+	unsigned int window_is_dynamic;
 } dbs_tuners_ins = {
 	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
 	.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
 	.down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
 	.ignore_nice = 0,
 	.powersave_bias = 0,
+	.sampling_window = DEF_SAMPLING_WINDOW,
+	.window_is_dynamic = 1,
 };
 
 static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
@@ -501,9 +529,79 @@  static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
 			CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
 }
 
+/* According to workload busy status to dynamic change sampling window,
+ * more idle, sampling window is smaller in proportion of current sampling
+ * window
+ */
+static unsigned int get_dynamic_sampling_window(struct cpu_dbs_info_s *dbs)
+{
+	unsigned int sampling_window = 0;
+	unsigned int busy_rate = 0;
+
+	if (dbs_tuners_ins.window_is_dynamic) {
+		busy_rate = (dbs->total_wtime - dbs->total_itime)
+			* 100 / dbs->total_wtime;
+
+		sampling_window = (dbs_tuners_ins.sampling_window * busy_rate)
+			/ 100;
+
+		if (sampling_window < dbs_tuners_ins.sampling_rate)
+			sampling_window = dbs_tuners_ins.sampling_rate;
+	} else
+		sampling_window = dbs_tuners_ins.sampling_window;
+
+	return sampling_window;
+}
+
+/* Get the average load during one sampling window */
+static unsigned long get_load_freq_during_sampling_window(
+		struct cpu_dbs_info_s *this_dbs_info, unsigned long load_freq,
+		unsigned int wall_time, unsigned int idle_time)
+{
+
+	unsigned int cur_p = 0, start_p = 0;
+
+	cur_p = this_dbs_info->cur_p;
+	start_p = this_dbs_info->start_p;
+	/* Record current sampling result */
+	this_dbs_info->lr[cur_p].load_freq = load_freq;
+	this_dbs_info->lr[cur_p].wall_time = wall_time;
+	this_dbs_info->lr[cur_p].idle_time = idle_time;
+	/* Cumulate records in sampling windows */
+	this_dbs_info->total_load += load_freq;
+	this_dbs_info->total_wtime += wall_time;
+	this_dbs_info->total_itime += idle_time;
+	this_dbs_info->cur_p = (cur_p + 1) % MAX_LOAD_RECORD_NUM;
+
+	/* Dynamicly get sampling window if sampling_is_dynamic set */
+	this_dbs_info->cur_sw = get_dynamic_sampling_window(this_dbs_info);
+
+	/* Find work load during the lastest sampling window */
+	while (this_dbs_info->total_wtime - this_dbs_info->lr[start_p].wall_time
+			 > this_dbs_info->cur_sw) {
+
+		this_dbs_info->total_wtime -=
+				this_dbs_info->lr[start_p].wall_time;
+		this_dbs_info->total_itime -=
+				this_dbs_info->lr[start_p].idle_time;
+		this_dbs_info->total_load -=
+				this_dbs_info->lr[start_p].load_freq;
+		start_p = (start_p + 1) % MAX_LOAD_RECORD_NUM;
+		this_dbs_info->start_p = start_p;
+	}
+
+	/* Get the average load in the lastest sampling window */
+	load_freq = this_dbs_info->total_load / this_dbs_info->total_wtime;
+
+	load_freq *= 100;
+	return load_freq;
+}
+
 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 {
-	unsigned int max_load_freq;
+	unsigned long max_load_freq;
+	unsigned int max_wall_time;
+	unsigned int max_idle_time;
 
 	struct cpufreq_policy *policy;
 	unsigned int j;
@@ -525,12 +623,14 @@  static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 
 	/* Get Absolute Load - in terms of freq */
 	max_load_freq = 0;
+	max_wall_time = 0;
+	max_idle_time = 0;
 
 	for_each_cpu(j, policy->cpus) {
 		struct cpu_dbs_info_s *j_dbs_info;
 		cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
+		unsigned long load_freq, load;
 		unsigned int idle_time, wall_time, iowait_time;
-		unsigned int load, load_freq;
 		int freq_avg;
 
 		j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
@@ -580,17 +680,28 @@  static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 		if (unlikely(!wall_time || wall_time < idle_time))
 			continue;
 
-		load = 100 * (wall_time - idle_time) / wall_time;
+		load = wall_time - idle_time;
 
 		freq_avg = __cpufreq_driver_getavg(policy, j);
 		if (freq_avg <= 0)
 			freq_avg = policy->cur;
 
 		load_freq = load * freq_avg;
-		if (load_freq > max_load_freq)
+		if (load_freq > max_load_freq) {
 			max_load_freq = load_freq;
+			max_wall_time = wall_time;
+			max_idle_time = idle_time;
+		}
 	}
 
+	if (sampling_window_enable)
+		/* Get the average load in the lastest sampling window */
+		max_load_freq = get_load_freq_during_sampling_window(
+				this_dbs_info, max_load_freq,
+				max_wall_time, max_idle_time);
+	else
+		max_load_freq = (100 * max_load_freq) / max_wall_time;
+
 	/* Check for frequency increase */
 	if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
 		/* If switching to max speed, apply sampling_down_factor */
@@ -713,6 +824,54 @@  static int should_io_be_busy(void)
 	return 0;
 }
 
+/* Initialize dbs_info struct */
+static int dbs_info_init(struct cpu_dbs_info_s *this_dbs_info,
+		struct cpufreq_policy *policy, unsigned int cpu)
+{
+	this_dbs_info->cpu = cpu;
+	this_dbs_info->rate_mult = 1;
+	/* Sampling windows only used in HW_ALL coordination */
+	if (cpumask_weight(policy->cpus) > 1)
+		return 0;
+
+	this_dbs_info->start_p = 0;
+	this_dbs_info->cur_p = 1;
+	this_dbs_info->total_wtime = 0;
+	this_dbs_info->total_itime = 0;
+	this_dbs_info->total_load = 0;
+	/* Initiate the load record */
+	this_dbs_info->lr = kmalloc(sizeof(struct load_record) *
+			(MAX_LOAD_RECORD_NUM), GFP_KERNEL);
+	if (!this_dbs_info->lr) {
+		printk(KERN_ERR "Malloc DBS load record failed\n");
+		return -EFAULT;
+	}
+
+	this_dbs_info->lr[0].load_freq = 0;
+	this_dbs_info->lr[0].wall_time = 0;
+	this_dbs_info->lr[0].idle_time = 0;
+	sampling_window_enable = 1;
+	dbs_tuners_ins.up_threshold = SAMPLING_WINDOW_UP_THRESHOLD;
+	dbs_tuners_ins.down_differential = SAMPLING_WINDOW_DOWN_DIFFERENTIAL;
+	return 0;
+
+}
+
+
+/* Free the load record buffer */
+static void destroy_dbs_info(void)
+{
+	struct cpu_dbs_info_s *dbs_info = NULL;
+	int i;
+	if (!sampling_window_enable)
+		return;
+
+	for_each_online_cpu(i) {
+		dbs_info = &per_cpu(od_cpu_dbs_info, i);
+		kfree(dbs_info->lr);
+	}
+}
+
 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 				   unsigned int event)
 {
@@ -749,8 +908,13 @@  static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 						kstat_cpu(j).cpustat.nice;
 			}
 		}
-		this_dbs_info->cpu = cpu;
-		this_dbs_info->rate_mult = 1;
+
+		rc = dbs_info_init(this_dbs_info, policy, cpu);
+		if (rc) {
+			mutex_unlock(&dbs_mutex);
+			return rc;
+		}
+
 		ondemand_powersave_bias_init_cpu(cpu);
 		/*
 		 * Start the timerschedule work, when this governor
@@ -854,6 +1018,7 @@  static void __exit cpufreq_gov_dbs_exit(void)
 {
 	cpufreq_unregister_governor(&cpufreq_gov_ondemand);
 	destroy_workqueue(kondemand_wq);
+	destroy_dbs_info();
 }