diff mbox series

[4/5] KVM: RISC-V: reset VCPU state when becoming runnable

Message ID 20250403112522.1566629-7-rkrcmar@ventanamicro.com (mailing list archive)
State New
Headers show
Series KVM: RISC-V: VCPU reset fixes | expand

Checks

Context Check Description
bjorn/pre-ci_am success Success
bjorn/build-rv32-defconfig success build-rv32-defconfig
bjorn/build-rv64-clang-allmodconfig success build-rv64-clang-allmodconfig
bjorn/build-rv64-gcc-allmodconfig success build-rv64-gcc-allmodconfig
bjorn/build-rv64-nommu-k210-defconfig success build-rv64-nommu-k210-defconfig
bjorn/build-rv64-nommu-k210-virt success build-rv64-nommu-k210-virt
bjorn/checkpatch fail checkpatch
bjorn/dtb-warn-rv64 success dtb-warn-rv64
bjorn/header-inline success header-inline
bjorn/kdoc success kdoc
bjorn/module-param success module-param
bjorn/verify-fixes success verify-fixes
bjorn/verify-signedoff success verify-signedoff

Commit Message

Radim Krčmář April 3, 2025, 11:25 a.m. UTC
Beware, this patch is "breaking" the userspace interface, because it
fixes a KVM/QEMU bug where the boot VCPU is not being reset by KVM.

The VCPU reset paths are inconsistent right now.  KVM resets VCPUs that
are brought up by KVM-accelerated SBI calls, but does nothing for VCPUs
brought up through ioctls.

We need to perform a KVM reset even when the VCPU is started through an
ioctl.  This patch is one of the ways we can achieve it.

Assume that userspace has no business setting the post-reset state.
KVM is de-facto the SBI implementation, as the SBI HSM acceleration
cannot be disabled and userspace cannot control the reset state, so KVM
should be in full control of the post-reset state.

Do not reset the pc and a1 registers, because SBI reset is expected to
provide them and KVM has no idea what these registers should be -- only
the userspace knows where it put the data.

An important consideration is resume.  Userspace might want to start
with non-reset state.  Check ran_atleast_once to allow this, because
KVM-SBI HSM creates some VCPUs as STOPPED.

The drawback is that userspace can still start the boot VCPU with an
incorrect reset state, because there is no way to distinguish a freshly
reset new VCPU on the KVM side (userspace might set some values by
mistake) from a restored VCPU (userspace must set all values).

The advantage of this solution is that it fixes current QEMU and makes
some sense with the assumption that KVM implements SBI HSM.
I do not like it too much, so I'd be in favor of a different solution if
we can still afford to drop support for current userspaces.

For a cleaner solution, we should add interfaces to perform the KVM-SBI
reset request on userspace demand.  I think it would also be much better
if userspace was in control of the post-reset state.

Signed-off-by: Radim Krčmář <rkrcmar@ventanamicro.com>
---
 arch/riscv/include/asm/kvm_host.h     |  1 +
 arch/riscv/include/asm/kvm_vcpu_sbi.h |  3 +++
 arch/riscv/kvm/vcpu.c                 |  9 +++++++++
 arch/riscv/kvm/vcpu_sbi.c             | 21 +++++++++++++++++++--
 4 files changed, 32 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 0c8c9c05af91..9bbf8c4a286b 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -195,6 +195,7 @@  struct kvm_vcpu_smstateen_csr {
 
 struct kvm_vcpu_reset_state {
 	spinlock_t lock;
+	bool active;
 	unsigned long pc;
 	unsigned long a1;
 };
diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h
index aaaa81355276..2c334a87e02a 100644
--- a/arch/riscv/include/asm/kvm_vcpu_sbi.h
+++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h
@@ -57,6 +57,9 @@  void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
 				     u32 type, u64 flags);
 void kvm_riscv_vcpu_sbi_request_reset(struct kvm_vcpu *vcpu,
                                       unsigned long pc, unsigned long a1);
+void __kvm_riscv_vcpu_set_reset_state(struct kvm_vcpu *vcpu,
+                                      unsigned long pc, unsigned long a1);
+void kvm_riscv_vcpu_sbi_request_reset_from_userspace(struct kvm_vcpu *vcpu);
 int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu,
 				   const struct kvm_one_reg *reg);
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index b8485c1c1ce4..4578863a39e3 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -58,6 +58,11 @@  static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu)
 	struct kvm_vcpu_reset_state *reset_state = &vcpu->arch.reset_state;
 	void *vector_datap = cntx->vector.datap;
 
+	spin_lock(&reset_state->lock);
+	if (!reset_state->active)
+		__kvm_riscv_vcpu_set_reset_state(vcpu, cntx->sepc, cntx->a1);
+	spin_unlock(&reset_state->lock);
+
 	memset(cntx, 0, sizeof(*cntx));
 	memset(csr, 0, sizeof(*csr));
 
@@ -520,6 +525,10 @@  int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 
 	switch (mp_state->mp_state) {
 	case KVM_MP_STATE_RUNNABLE:
+		if (riscv_vcpu_supports_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_HSM) &&
+				vcpu->arch.ran_atleast_once &&
+				kvm_riscv_vcpu_stopped(vcpu))
+			kvm_riscv_vcpu_sbi_request_reset_from_userspace(vcpu);
 		WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
 		break;
 	case KVM_MP_STATE_STOPPED:
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 3d7955e05cc3..77f9f0bd3842 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -156,12 +156,29 @@  void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
 	run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
 }
 
+/* must be called with held vcpu->arch.reset_state.lock */
+void __kvm_riscv_vcpu_set_reset_state(struct kvm_vcpu *vcpu,
+                                      unsigned long pc, unsigned long a1)
+{
+	vcpu->arch.reset_state.active = true;
+	vcpu->arch.reset_state.pc = pc;
+	vcpu->arch.reset_state.a1 = a1;
+}
+
 void kvm_riscv_vcpu_sbi_request_reset(struct kvm_vcpu *vcpu,
                                       unsigned long pc, unsigned long a1)
 {
 	spin_lock(&vcpu->arch.reset_state.lock);
-	vcpu->arch.reset_state.pc = pc;
-	vcpu->arch.reset_state.a1 = a1;
+	__kvm_riscv_vcpu_set_reset_state(vcpu, pc, a1);
+	spin_unlock(&vcpu->arch.reset_state.lock);
+
+	kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
+}
+
+void kvm_riscv_vcpu_sbi_request_reset_from_userspace(struct kvm_vcpu *vcpu)
+{
+	spin_lock(&vcpu->arch.reset_state.lock);
+	vcpu->arch.reset_state.active = false;
 	spin_unlock(&vcpu->arch.reset_state.lock);
 
 	kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);