From patchwork Tue Jul  7 18:24:06 2015
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Morten Rasmussen <morten.rasmussen@arm.com>
X-Patchwork-Id: 6738421
Return-Path: <linux-pm-owner@kernel.org>
X-Original-To: patchwork-linux-pm@patchwork.kernel.org
Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org
Received: from mail.kernel.org (mail.kernel.org [198.145.29.136])
	by patchwork1.web.kernel.org (Postfix) with ESMTP id 09C8B9F2F0
	for <patchwork-linux-pm@patchwork.kernel.org>;
	Tue,  7 Jul 2015 19:00:15 +0000 (UTC)
Received: from mail.kernel.org (localhost [127.0.0.1])
	by mail.kernel.org (Postfix) with ESMTP id 67CA720719
	for <patchwork-linux-pm@patchwork.kernel.org>;
	Tue,  7 Jul 2015 19:00:10 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by mail.kernel.org (Postfix) with ESMTP id 03917206EC
	for <patchwork-linux-pm@patchwork.kernel.org>;
	Tue,  7 Jul 2015 19:00:09 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S934677AbbGGTAF (ORCPT
	<rfc822;patchwork-linux-pm@patchwork.kernel.org>);
	Tue, 7 Jul 2015 15:00:05 -0400
Received: from foss.arm.com ([217.140.101.70]:37541 "EHLO foss.arm.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S932864AbbGGSXA (ORCPT <rfc822;linux-pm@vger.kernel.org>);
	Tue, 7 Jul 2015 14:23:00 -0400
Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.72.51.249])
	by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id F2B595E4;
	Tue,  7 Jul 2015 11:23:25 -0700 (PDT)
Received: from e105550-lin.cambridge.arm.com (e105550-lin.cambridge.arm.com
	[10.2.131.193])
	by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id
	E00B33F23A; Tue,  7 Jul 2015 11:22:56 -0700 (PDT)
From: Morten Rasmussen <morten.rasmussen@arm.com>
To: peterz@infradead.org, mingo@redhat.com
Cc: vincent.guittot@linaro.org, daniel.lezcano@linaro.org,
	Dietmar Eggemann <Dietmar.Eggemann@arm.com>,
	yuyang.du@intel.com, mturquette@baylibre.com, rjw@rjwysocki.net,
	Juri Lelli <Juri.Lelli@arm.com>, sgurrappadi@nvidia.com,
	pang.xunlei@zte.com.cn, linux-kernel@vger.kernel.org,
	linux-pm@vger.kernel.org
Subject: [RFCv5 PATCH 23/46] sched: Extend sched_group_energy to test
	load-balancing decisions
Date: Tue,  7 Jul 2015 19:24:06 +0100
Message-Id: <1436293469-25707-24-git-send-email-morten.rasmussen@arm.com>
X-Mailer: git-send-email 1.9.1
In-Reply-To: <1436293469-25707-1-git-send-email-morten.rasmussen@arm.com>
References: <1436293469-25707-1-git-send-email-morten.rasmussen@arm.com>
Sender: linux-pm-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-pm.vger.kernel.org>
X-Mailing-List: linux-pm@vger.kernel.org
X-Spam-Status: No, score=-7.7 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI,
	RP_MATCHES_RCVD,
	UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org
X-Virus-Scanned: ClamAV using ClamSMTP

Extended sched_group_energy() to support energy prediction with usage
(tasks) added/removed from a specific cpu or migrated between a pair of
cpus. Useful for load-balancing decision making.

cc: Ingo Molnar <mingo@redhat.com>
cc: Peter Zijlstra <peterz@infradead.org>

Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
---
 kernel/sched/fair.c | 86 +++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 60 insertions(+), 26 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bd0be9d..362e33b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4825,15 +4825,17 @@ static unsigned long capacity_curr_of(int cpu)
  * capacity_curr (but not capacity_orig) as it useful for predicting the
  * capacity required after task migrations (scheduler-driven DVFS).
  */
-
-static unsigned long get_cpu_usage(int cpu)
+static unsigned long __get_cpu_usage(int cpu, int delta)
 {
 	int sum;
 	unsigned long usage = cpu_rq(cpu)->cfs.utilization_load_avg;
 	unsigned long blocked = cpu_rq(cpu)->cfs.utilization_blocked_avg;
 	unsigned long capacity_orig = capacity_orig_of(cpu);
 
-	sum = usage + blocked;
+	sum = usage + blocked + delta;
+
+	if (sum < 0)
+		return 0;
 
 	if (sum >= capacity_orig)
 		return capacity_orig;
@@ -4841,13 +4843,28 @@ static unsigned long get_cpu_usage(int cpu)
 	return sum;
 }
 
+static unsigned long get_cpu_usage(int cpu)
+{
+	return __get_cpu_usage(cpu, 0);
+}
+
 static inline bool energy_aware(void)
 {
 	return sched_feat(ENERGY_AWARE);
 }
 
+struct energy_env {
+	struct sched_group	*sg_top;
+	struct sched_group	*sg_cap;
+	int			cap_idx;
+	int			usage_delta;
+	int			src_cpu;
+	int			dst_cpu;
+	int			energy;
+};
+
 /*
- * cpu_norm_usage() returns the cpu usage relative to a specific capacity,
+ * __cpu_norm_usage() returns the cpu usage relative to a specific capacity,
  * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for
  * energy calculations. Using the scale-invariant usage returned by
  * get_cpu_usage() and approximating scale-invariant usage by:
@@ -4860,9 +4877,9 @@ static inline bool energy_aware(void)
  *
  *   norm_usage = running_time/time ~ usage/capacity
  */
-static unsigned long cpu_norm_usage(int cpu, unsigned long capacity)
+static unsigned long __cpu_norm_usage(int cpu, unsigned long capacity, int delta)
 {
-	int usage = __get_cpu_usage(cpu);
+	int usage = __get_cpu_usage(cpu, delta);
 
 	if (usage >= capacity)
 		return SCHED_CAPACITY_SCALE;
@@ -4870,13 +4887,25 @@ static unsigned long cpu_norm_usage(int cpu, unsigned long capacity)
 	return (usage << SCHED_CAPACITY_SHIFT)/capacity;
 }
 
-static unsigned long group_max_usage(struct sched_group *sg)
+static int calc_usage_delta(struct energy_env *eenv, int cpu)
 {
-	int i;
+	if (cpu == eenv->src_cpu)
+		return -eenv->usage_delta;
+	if (cpu == eenv->dst_cpu)
+		return eenv->usage_delta;
+	return 0;
+}
+
+static
+unsigned long group_max_usage(struct energy_env *eenv)
+{
+	int i, delta;
 	unsigned long max_usage = 0;
 
-	for_each_cpu(i, sched_group_cpus(sg))
-		max_usage = max(max_usage, get_cpu_usage(i));
+	for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) {
+		delta = calc_usage_delta(eenv, i);
+		max_usage = max(max_usage, __get_cpu_usage(i, delta));
+	}
 
 	return max_usage;
 }
@@ -4890,31 +4919,36 @@ static unsigned long group_max_usage(struct sched_group *sg)
  * latter is used as the estimate as it leads to a more pessimistic energy
  * estimate (more busy).
  */
-static unsigned long group_norm_usage(struct sched_group *sg, int cap_idx)
+static unsigned
+long group_norm_usage(struct energy_env *eenv, struct sched_group *sg)
 {
-	int i;
+	int i, delta;
 	unsigned long usage_sum = 0;
-	unsigned long capacity = sg->sge->cap_states[cap_idx].cap;
+	unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap;
 
-	for_each_cpu(i, sched_group_cpus(sg))
-		usage_sum += cpu_norm_usage(i, capacity);
+	for_each_cpu(i, sched_group_cpus(sg)) {
+		delta = calc_usage_delta(eenv, i);
+		usage_sum += __cpu_norm_usage(i, capacity, delta);
+	}
 
 	if (usage_sum > SCHED_CAPACITY_SCALE)
 		return SCHED_CAPACITY_SCALE;
 	return usage_sum;
 }
 
-static int find_new_capacity(struct sched_group *sg,
+static int find_new_capacity(struct energy_env *eenv,
 		struct sched_group_energy *sge)
 {
 	int idx;
-	unsigned long util = group_max_usage(sg);
+	unsigned long util = group_max_usage(eenv);
 
 	for (idx = 0; idx < sge->nr_cap_states; idx++) {
 		if (sge->cap_states[idx].cap >= util)
 			return idx;
 	}
 
+	eenv->cap_idx = idx;
+
 	return idx;
 }
 
@@ -4927,16 +4961,16 @@ static int find_new_capacity(struct sched_group *sg,
  * gather the same usage statistics multiple times. This can probably be done in
  * a faster but more complex way.
  */
-static unsigned int sched_group_energy(struct sched_group *sg_top)
+static unsigned int sched_group_energy(struct energy_env *eenv)
 {
 	struct sched_domain *sd;
 	int cpu, total_energy = 0;
 	struct cpumask visit_cpus;
 	struct sched_group *sg;
 
-	WARN_ON(!sg_top->sge);
+	WARN_ON(!eenv->sg_top->sge);
 
-	cpumask_copy(&visit_cpus, sched_group_cpus(sg_top));
+	cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top));
 
 	while (!cpumask_empty(&visit_cpus)) {
 		struct sched_group *sg_shared_cap = NULL;
@@ -4959,17 +4993,16 @@ static unsigned int sched_group_energy(struct sched_group *sg_top)
 				break;
 
 			do {
-				struct sched_group *sg_cap_util;
 				unsigned long group_util;
 				int sg_busy_energy, sg_idle_energy, cap_idx;
 
 				if (sg_shared_cap && sg_shared_cap->group_weight >= sg->group_weight)
-					sg_cap_util = sg_shared_cap;
+					eenv->sg_cap = sg_shared_cap;
 				else
-					sg_cap_util = sg;
+					eenv->sg_cap = sg;
 
-				cap_idx = find_new_capacity(sg_cap_util, sg->sge);
-				group_util = group_norm_usage(sg, cap_idx);
+				cap_idx = find_new_capacity(eenv, sg->sge);
+				group_util = group_norm_usage(eenv, sg);
 				sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power)
 										>> SCHED_CAPACITY_SHIFT;
 				sg_idle_energy = ((SCHED_LOAD_SCALE-group_util) * sg->sge->idle_states[0].power)
@@ -4980,7 +5013,7 @@ static unsigned int sched_group_energy(struct sched_group *sg_top)
 				if (!sd->child)
 					cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg));
 
-				if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(sg_top)))
+				if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top)))
 					goto next_cpu;
 
 			} while (sg = sg->next, sg != sd->groups);
@@ -4989,6 +5022,7 @@ static unsigned int sched_group_energy(struct sched_group *sg_top)
 		continue;
 	}
 
+	eenv->energy = total_energy;
 	return total_energy;
 }