diff mbox series

[RFC] KVM: PPC: Book3S HV: Update guest state entry/exit accounting to new API

Message ID 20220223153352.2590602-1-npiggin@gmail.com (mailing list archive)
State New, archived
Headers show
Series [RFC] KVM: PPC: Book3S HV: Update guest state entry/exit accounting to new API | expand

Commit Message

Nicholas Piggin Feb. 23, 2022, 3:33 p.m. UTC
Update the guest state and timing entry/exit accounting to use the new
API, which was introduced following issues found[1]. KVM HV does not
seem to suffer from those issues listed, but it does call srcu inside
the guest context which is fragile at best. The new API allows guest
entry timing to be de-coupled from state entry.

Change to the new API, move the srcu_read_lock/unlock outside the guest
context, move tracing related entry/exit together with the guest state
switches, and extend timing coverage out to include the secondary thread
gathering in the P7/8 path.

[1] https://lore.kernel.org/lkml/20220201132926.3301912-1-mark.rutland@arm.com/

Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
KVM PR and BookE possibly have more issues (like taking host interrupts
in guest context) but look harder to fix.

Thanks,
Nick

 arch/powerpc/kvm/book3s_hv.c | 39 ++++++++++++------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)
diff mbox series

Patch

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 84c89f08ae9a..4f0915509dbb 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3683,6 +3683,8 @@  static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		return;
 	}
 
+	guest_timing_enter_irqoff();
+
 	kvmppc_clear_host_core(pcpu);
 
 	/* Decide on micro-threading (split-core) mode */
@@ -3812,23 +3814,15 @@  static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	for (sub = 0; sub < core_info.n_subcores; ++sub)
 		spin_unlock(&core_info.vc[sub]->lock);
 
-	guest_enter_irqoff();
-
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
+	guest_state_enter_irqoff();
 	this_cpu_disable_ftrace();
 
-	/*
-	 * Interrupts will be enabled once we get into the guest,
-	 * so tell lockdep that we're about to enable interrupts.
-	 */
-	trace_hardirqs_on();
-
 	trap = __kvmppc_vcore_entry();
 
-	trace_hardirqs_off();
-
 	this_cpu_enable_ftrace();
+	guest_state_exit_irqoff();
 
 	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
@@ -3863,11 +3857,10 @@  static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	kvmppc_set_host_core(pcpu);
 
-	context_tracking_guest_exit();
 	if (!vtime_accounting_enabled_this_cpu()) {
 		local_irq_enable();
 		/*
-		 * Service IRQs here before vtime_account_guest_exit() so any
+		 * Service IRQs here before guest_timing_exit_irqoff() so any
 		 * ticks that occurred while running the guest are accounted to
 		 * the guest. If vtime accounting is enabled, accounting uses
 		 * TB rather than ticks, so it can be done without enabling
@@ -3876,7 +3869,7 @@  static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		 */
 		local_irq_disable();
 	}
-	vtime_account_guest_exit();
+	guest_timing_exit_irqoff();
 
 	local_irq_enable();
 
@@ -4520,33 +4513,28 @@  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	__kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
 
-	trace_kvm_guest_enter(vcpu);
-
-	guest_enter_irqoff();
-
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 
+	trace_kvm_guest_enter(vcpu);
+	guest_timing_enter_irqoff();
+	guest_state_enter_irqoff();
 	this_cpu_disable_ftrace();
 
-	/* Tell lockdep that we're about to enable interrupts */
-	trace_hardirqs_on();
-
 	trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb);
 	vcpu->arch.trap = trap;
 
-	trace_hardirqs_off();
-
 	this_cpu_enable_ftrace();
+	guest_state_exit_irqoff();
+	trace_kvm_guest_exit(vcpu);
 
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
 	set_irq_happened(trap);
 
-	context_tracking_guest_exit();
 	if (!vtime_accounting_enabled_this_cpu()) {
 		local_irq_enable();
 		/*
-		 * Service IRQs here before vtime_account_guest_exit() so any
+		 * Service IRQs here before guest_timing_exit_irqoff() so any
 		 * ticks that occurred while running the guest are accounted to
 		 * the guest. If vtime accounting is enabled, accounting uses
 		 * TB rather than ticks, so it can be done without enabling
@@ -4555,7 +4543,7 @@  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 		 */
 		local_irq_disable();
 	}
-	vtime_account_guest_exit();
+	guest_timing_exit_irqoff();
 
 	vcpu->cpu = -1;
 	vcpu->arch.thread_cpu = -1;
@@ -4575,7 +4563,6 @@  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 			  kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
 		kvmppc_core_dequeue_dec(vcpu);
 
-	trace_kvm_guest_exit(vcpu);
 	r = RESUME_GUEST;
 	if (trap) {
 		if (!nested)