[4/4] KVM guest implementation

Message ID	20210831015919.13006-4-skyele@sjtu.edu.cn (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-sgx-owner@kernel.org> sender: skyele@sjtu.edu.cn) by proxy02.sjtu.edu.cn (Postfix) with ESMTPSA id D7C062036576A; Tue, 31 Aug 2021 10:00:48 +0800 (CST) From: Tianqiang Xu <skyele@sjtu.edu.cn> To: x86@kernel.org Cc: pbonzini@redhat.com, seanjc@google.com, vkuznets@redhat.com, wanpengli@tencent.com, jmattson@google.com, joro@8bytes.org, tglx@linutronix.de, mingo@redhat.com, bp@alien8.de, kvm@vger.kernel.org, hpa@zytor.com, jarkko@kernel.org, dave.hansen@linux.intel.com, linux-kernel@vger.kernel.org, linux-sgx@vger.kernel.org, Tianqiang Xu <skyele@sjtu.edu.cn> Subject: [PATCH 4/4] KVM guest implementation Date: Tue, 31 Aug 2021 09:59:19 +0800 Message-Id: <20210831015919.13006-4-skyele@sjtu.edu.cn> In-Reply-To: <20210831015919.13006-1-skyele@sjtu.edu.cn> References: <20210831015919.13006-1-skyele@sjtu.edu.cn> Precedence: bulk
Series	[1/4] KVM: x86: Introduce .pcpu_is_idle() stub infrastructure \| expand [1/4] KVM: x86: Introduce .pcpu_is_idle() stub infrastructure [2/4] Scheduler changes [3/4] KVM host implementation [4/4] KVM guest implementation

Message ID

20210831015919.13006-4-skyele@sjtu.edu.cn (mailing list archive)

State

New, archived

Headers

From: Tianqiang Xu <skyele@sjtu.edu.cn>
To: x86@kernel.org
Cc: pbonzini@redhat.com, seanjc@google.com, vkuznets@redhat.com,
        wanpengli@tencent.com, jmattson@google.com, joro@8bytes.org,
        tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
        kvm@vger.kernel.org, hpa@zytor.com, jarkko@kernel.org,
        dave.hansen@linux.intel.com, linux-kernel@vger.kernel.org,
        linux-sgx@vger.kernel.org, Tianqiang Xu <skyele@sjtu.edu.cn>
Subject: [PATCH 4/4] KVM guest implementation
Date: Tue, 31 Aug 2021 09:59:19 +0800
Message-Id: <20210831015919.13006-4-skyele@sjtu.edu.cn>
In-Reply-To: <20210831015919.13006-1-skyele@sjtu.edu.cn>
References: <20210831015919.13006-1-skyele@sjtu.edu.cn>
Precedence: bulk

Series

[1/4] KVM: x86: Introduce .pcpu_is_idle() stub infrastructure | expand

Commit Message

Tianqiang Xu Aug. 31, 2021, 1:59 a.m. UTC

Guest OS uses 'is_idle' field of kvm_steal_time to know if a pCPU
is idle and decides whether to schedule a task to a preempted vCPU
or not. If the pCPU is idle, scheduling a task to this pCPU will
improve cpu utilization. If not, avoiding scheduling a task to this
preempted vCPU can avoid host/guest switch, hence improving performance.

Guest OS invokes available_idle_cpu_sched() to get the value of
'is_idle' field of kvm_steal_time.

Other modules in kernel except kernel/sched/fair.c which invokes
available_idle_cpu() is left unchanged, because other modules in
kernel need the semantic provided by 'preempted' field of kvm_steal_time.

--
Authors: Tianqiang Xu, Dingji Li, Zeyu Mi
	 Shanghai Jiao Tong University

Signed-off-by: Tianqiang Xu <skyele@sjtu.edu.cn>

---
 kernel/sched/fair.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

Comments

Peter Zijlstra Aug. 31, 2021, 7:21 a.m. UTC | #1

On Tue, Aug 31, 2021 at 09:59:19AM +0800, Tianqiang Xu wrote:
> Guest OS uses 'is_idle' field of kvm_steal_time to know if a pCPU
> is idle and decides whether to schedule a task to a preempted vCPU
> or not. If the pCPU is idle, scheduling a task to this pCPU will
> improve cpu utilization. If not, avoiding scheduling a task to this
> preempted vCPU can avoid host/guest switch, hence improving performance.
> 
> Guest OS invokes available_idle_cpu_sched() to get the value of
> 'is_idle' field of kvm_steal_time.
> 
> Other modules in kernel except kernel/sched/fair.c which invokes
> available_idle_cpu() is left unchanged, because other modules in
> kernel need the semantic provided by 'preempted' field of kvm_steal_time.

> ---
>  kernel/sched/fair.c | 24 ++++++++++++------------
>  1 file changed, 12 insertions(+), 12 deletions(-)

Goes and replaces every single available_idle_cpu() in fair with the new
function that doesn't consider vCPU preemption.

So what do you reckon now happens in the oversubscribed virt scenario
where each CPU has multiple vCPUs?

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 44c452072a1b..f69f0a8d2abe 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5871,13 +5871,13 @@  wake_affine_idle(int this_cpu, int prev_cpu, int sync)
 	 * a cpufreq perspective, it's better to have higher utilisation
 	 * on one CPU.
 	 */
-	if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
-		return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
+	if (available_idle_cpu_sched(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
+		return available_idle_cpu_sched(prev_cpu) ? prev_cpu : this_cpu;
 
 	if (sync && cpu_rq(this_cpu)->nr_running == 1)
 		return this_cpu;
 
-	if (available_idle_cpu(prev_cpu))
+	if (available_idle_cpu_sched(prev_cpu))
 		return prev_cpu;
 
 	return nr_cpumask_bits;
@@ -5976,7 +5976,7 @@  find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
 		if (sched_idle_cpu(i))
 			return i;
 
-		if (available_idle_cpu(i)) {
+		if (available_idle_cpu_sched(i)) {
 			struct cpuidle_state *idle = idle_get_state(rq);
 			if (idle && idle->exit_latency < min_exit_latency) {
 				/*
@@ -6064,7 +6064,7 @@  static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 
 static inline int __select_idle_cpu(int cpu, struct task_struct *p)
 {
-	if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) &&
+	if ((available_idle_cpu_sched(cpu) || sched_idle_cpu(cpu)) &&
 	    sched_cpu_cookie_match(cpu_rq(cpu), p))
 		return cpu;
 
@@ -6115,7 +6115,7 @@  void __update_idle_core(struct rq *rq)
 		if (cpu == core)
 			continue;
 
-		if (!available_idle_cpu(cpu))
+		if (!available_idle_cpu_sched(cpu))
 			goto unlock;
 	}
 
@@ -6138,7 +6138,7 @@  static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
 		return __select_idle_cpu(core, p);
 
 	for_each_cpu(cpu, cpu_smt_mask(core)) {
-		if (!available_idle_cpu(cpu)) {
+		if (!available_idle_cpu_sched(cpu)) {
 			idle = false;
 			if (*idle_cpu == -1) {
 				if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) {
@@ -6171,7 +6171,7 @@  static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
 		if (!cpumask_test_cpu(cpu, p->cpus_ptr) ||
 		    !cpumask_test_cpu(cpu, sched_domain_span(sd)))
 			continue;
-		if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
+		if (available_idle_cpu_sched(cpu) || sched_idle_cpu(cpu))
 			return cpu;
 	}
 
@@ -6302,7 +6302,7 @@  select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
 	for_each_cpu_wrap(cpu, cpus, target) {
 		unsigned long cpu_cap = capacity_of(cpu);
 
-		if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
+		if (!available_idle_cpu_sched(cpu) && !sched_idle_cpu(cpu))
 			continue;
 		if (fits_capacity(task_util, cpu_cap))
 			return cpu;
@@ -6348,7 +6348,7 @@  static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	 */
 	lockdep_assert_irqs_disabled();
 
-	if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
+	if ((available_idle_cpu_sched(target) || sched_idle_cpu(target)) &&
 	    asym_fits_capacity(task_util, target))
 		return target;
 
@@ -6356,7 +6356,7 @@  static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	 * If the previous CPU is cache affine and idle, don't be stupid:
 	 */
 	if (prev != target && cpus_share_cache(prev, target) &&
-	    (available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
+	    (available_idle_cpu_sched(prev) || sched_idle_cpu(prev)) &&
 	    asym_fits_capacity(task_util, prev))
 		return prev;
 
@@ -6379,7 +6379,7 @@  static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	if (recent_used_cpu != prev &&
 	    recent_used_cpu != target &&
 	    cpus_share_cache(recent_used_cpu, target) &&
-	    (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
+	    (available_idle_cpu_sched(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
 	    cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
 	    asym_fits_capacity(task_util, recent_used_cpu)) {
 		/*

[4/4] KVM guest implementation

Commit Message

Comments

Patch